]> Pileus Git - ~andy/linux/blob - fs/btrfs/check-integrity.c
ASoC: atmel: sam9x5_wm8731: remove platform_set_drvdata
[~andy/linux] / fs / btrfs / check-integrity.c
1 /*
2  * Copyright (C) STRATO AG 2011.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 /*
20  * This module can be used to catch cases when the btrfs kernel
21  * code executes write requests to the disk that bring the file
22  * system in an inconsistent state. In such a state, a power-loss
23  * or kernel panic event would cause that the data on disk is
24  * lost or at least damaged.
25  *
26  * Code is added that examines all block write requests during
27  * runtime (including writes of the super block). Three rules
28  * are verified and an error is printed on violation of the
29  * rules:
30  * 1. It is not allowed to write a disk block which is
31  *    currently referenced by the super block (either directly
32  *    or indirectly).
33  * 2. When a super block is written, it is verified that all
34  *    referenced (directly or indirectly) blocks fulfill the
35  *    following requirements:
36  *    2a. All referenced blocks have either been present when
37  *        the file system was mounted, (i.e., they have been
38  *        referenced by the super block) or they have been
39  *        written since then and the write completion callback
40  *        was called and no write error was indicated and a
41  *        FLUSH request to the device where these blocks are
42  *        located was received and completed.
43  *    2b. All referenced blocks need to have a generation
44  *        number which is equal to the parent's number.
45  *
46  * One issue that was found using this module was that the log
47  * tree on disk became temporarily corrupted because disk blocks
48  * that had been in use for the log tree had been freed and
49  * reused too early, while being referenced by the written super
50  * block.
51  *
52  * The search term in the kernel log that can be used to filter
53  * on the existence of detected integrity issues is
54  * "btrfs: attempt".
55  *
56  * The integrity check is enabled via mount options. These
57  * mount options are only supported if the integrity check
58  * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
59  *
60  * Example #1, apply integrity checks to all metadata:
61  * mount /dev/sdb1 /mnt -o check_int
62  *
63  * Example #2, apply integrity checks to all metadata and
64  * to data extents:
65  * mount /dev/sdb1 /mnt -o check_int_data
66  *
67  * Example #3, apply integrity checks to all metadata and dump
68  * the tree that the super block references to kernel messages
69  * each time after a super block was written:
70  * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
71  *
72  * If the integrity check tool is included and activated in
73  * the mount options, plenty of kernel memory is used, and
74  * plenty of additional CPU cycles are spent. Enabling this
75  * functionality is not intended for normal use. In most
76  * cases, unless you are a btrfs developer who needs to verify
77  * the integrity of (super)-block write requests, do not
78  * enable the config option BTRFS_FS_CHECK_INTEGRITY to
79  * include and compile the integrity check tool.
80  *
81  * Expect millions of lines of information in the kernel log with an
82  * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the
83  * kernel config to at least 26 (which is 64MB). Usually the value is
84  * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be
85  * changed like this before LOG_BUF_SHIFT can be set to a high value:
86  * config LOG_BUF_SHIFT
87  *       int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
88  *       range 12 30
89  */
90
91 #include <linux/sched.h>
92 #include <linux/slab.h>
93 #include <linux/buffer_head.h>
94 #include <linux/mutex.h>
95 #include <linux/crc32c.h>
96 #include <linux/genhd.h>
97 #include <linux/blkdev.h>
98 #include "ctree.h"
99 #include "disk-io.h"
100 #include "transaction.h"
101 #include "extent_io.h"
102 #include "volumes.h"
103 #include "print-tree.h"
104 #include "locking.h"
105 #include "check-integrity.h"
106 #include "rcu-string.h"
107
108 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
109 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
110 #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
111 #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
112 #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
113 #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
114 #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
115 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6)    /* in characters,
116                                                          * excluding " [...]" */
117 #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
118
119 /*
120  * The definition of the bitmask fields for the print_mask.
121  * They are specified with the mount option check_integrity_print_mask.
122  */
123 #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE                     0x00000001
124 #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION         0x00000002
125 #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE                  0x00000004
126 #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE                 0x00000008
127 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH                        0x00000010
128 #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH                        0x00000020
129 #define BTRFSIC_PRINT_MASK_VERBOSE                              0x00000040
130 #define BTRFSIC_PRINT_MASK_VERY_VERBOSE                         0x00000080
131 #define BTRFSIC_PRINT_MASK_INITIAL_TREE                         0x00000100
132 #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES                    0x00000200
133 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE                     0x00000400
134 #define BTRFSIC_PRINT_MASK_NUM_COPIES                           0x00000800
135 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS                0x00001000
136 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE                0x00002000
137
138 struct btrfsic_dev_state;
139 struct btrfsic_state;
140
141 struct btrfsic_block {
142         u32 magic_num;          /* only used for debug purposes */
143         unsigned int is_metadata:1;     /* if it is meta-data, not data-data */
144         unsigned int is_superblock:1;   /* if it is one of the superblocks */
145         unsigned int is_iodone:1;       /* if is done by lower subsystem */
146         unsigned int iodone_w_error:1;  /* error was indicated to endio */
147         unsigned int never_written:1;   /* block was added because it was
148                                          * referenced, not because it was
149                                          * written */
150         unsigned int mirror_num;        /* large enough to hold
151                                          * BTRFS_SUPER_MIRROR_MAX */
152         struct btrfsic_dev_state *dev_state;
153         u64 dev_bytenr;         /* key, physical byte num on disk */
154         u64 logical_bytenr;     /* logical byte num on disk */
155         u64 generation;
156         struct btrfs_disk_key disk_key; /* extra info to print in case of
157                                          * issues, will not always be correct */
158         struct list_head collision_resolving_node;      /* list node */
159         struct list_head all_blocks_node;       /* list node */
160
161         /* the following two lists contain block_link items */
162         struct list_head ref_to_list;   /* list */
163         struct list_head ref_from_list; /* list */
164         struct btrfsic_block *next_in_same_bio;
165         void *orig_bio_bh_private;
166         union {
167                 bio_end_io_t *bio;
168                 bh_end_io_t *bh;
169         } orig_bio_bh_end_io;
170         int submit_bio_bh_rw;
171         u64 flush_gen; /* only valid if !never_written */
172 };
173
174 /*
175  * Elements of this type are allocated dynamically and required because
176  * each block object can refer to and can be ref from multiple blocks.
177  * The key to lookup them in the hashtable is the dev_bytenr of
178  * the block ref to plus the one from the block refered from.
179  * The fact that they are searchable via a hashtable and that a
180  * ref_cnt is maintained is not required for the btrfs integrity
181  * check algorithm itself, it is only used to make the output more
182  * beautiful in case that an error is detected (an error is defined
183  * as a write operation to a block while that block is still referenced).
184  */
185 struct btrfsic_block_link {
186         u32 magic_num;          /* only used for debug purposes */
187         u32 ref_cnt;
188         struct list_head node_ref_to;   /* list node */
189         struct list_head node_ref_from; /* list node */
190         struct list_head collision_resolving_node;      /* list node */
191         struct btrfsic_block *block_ref_to;
192         struct btrfsic_block *block_ref_from;
193         u64 parent_generation;
194 };
195
196 struct btrfsic_dev_state {
197         u32 magic_num;          /* only used for debug purposes */
198         struct block_device *bdev;
199         struct btrfsic_state *state;
200         struct list_head collision_resolving_node;      /* list node */
201         struct btrfsic_block dummy_block_for_bio_bh_flush;
202         u64 last_flush_gen;
203         char name[BDEVNAME_SIZE];
204 };
205
206 struct btrfsic_block_hashtable {
207         struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
208 };
209
210 struct btrfsic_block_link_hashtable {
211         struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
212 };
213
214 struct btrfsic_dev_state_hashtable {
215         struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
216 };
217
218 struct btrfsic_block_data_ctx {
219         u64 start;              /* virtual bytenr */
220         u64 dev_bytenr;         /* physical bytenr on device */
221         u32 len;
222         struct btrfsic_dev_state *dev;
223         char **datav;
224         struct page **pagev;
225         void *mem_to_free;
226 };
227
228 /* This structure is used to implement recursion without occupying
229  * any stack space, refer to btrfsic_process_metablock() */
230 struct btrfsic_stack_frame {
231         u32 magic;
232         u32 nr;
233         int error;
234         int i;
235         int limit_nesting;
236         int num_copies;
237         int mirror_num;
238         struct btrfsic_block *block;
239         struct btrfsic_block_data_ctx *block_ctx;
240         struct btrfsic_block *next_block;
241         struct btrfsic_block_data_ctx next_block_ctx;
242         struct btrfs_header *hdr;
243         struct btrfsic_stack_frame *prev;
244 };
245
246 /* Some state per mounted filesystem */
247 struct btrfsic_state {
248         u32 print_mask;
249         int include_extent_data;
250         int csum_size;
251         struct list_head all_blocks_list;
252         struct btrfsic_block_hashtable block_hashtable;
253         struct btrfsic_block_link_hashtable block_link_hashtable;
254         struct btrfs_root *root;
255         u64 max_superblock_generation;
256         struct btrfsic_block *latest_superblock;
257         u32 metablock_size;
258         u32 datablock_size;
259 };
260
261 static void btrfsic_block_init(struct btrfsic_block *b);
262 static struct btrfsic_block *btrfsic_block_alloc(void);
263 static void btrfsic_block_free(struct btrfsic_block *b);
264 static void btrfsic_block_link_init(struct btrfsic_block_link *n);
265 static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
266 static void btrfsic_block_link_free(struct btrfsic_block_link *n);
267 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
268 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
269 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
270 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
271 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
272                                         struct btrfsic_block_hashtable *h);
273 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
274 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
275                 struct block_device *bdev,
276                 u64 dev_bytenr,
277                 struct btrfsic_block_hashtable *h);
278 static void btrfsic_block_link_hashtable_init(
279                 struct btrfsic_block_link_hashtable *h);
280 static void btrfsic_block_link_hashtable_add(
281                 struct btrfsic_block_link *l,
282                 struct btrfsic_block_link_hashtable *h);
283 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
284 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
285                 struct block_device *bdev_ref_to,
286                 u64 dev_bytenr_ref_to,
287                 struct block_device *bdev_ref_from,
288                 u64 dev_bytenr_ref_from,
289                 struct btrfsic_block_link_hashtable *h);
290 static void btrfsic_dev_state_hashtable_init(
291                 struct btrfsic_dev_state_hashtable *h);
292 static void btrfsic_dev_state_hashtable_add(
293                 struct btrfsic_dev_state *ds,
294                 struct btrfsic_dev_state_hashtable *h);
295 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
296 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
297                 struct block_device *bdev,
298                 struct btrfsic_dev_state_hashtable *h);
299 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
300 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
301 static int btrfsic_process_superblock(struct btrfsic_state *state,
302                                       struct btrfs_fs_devices *fs_devices);
303 static int btrfsic_process_metablock(struct btrfsic_state *state,
304                                      struct btrfsic_block *block,
305                                      struct btrfsic_block_data_ctx *block_ctx,
306                                      int limit_nesting, int force_iodone_flag);
307 static void btrfsic_read_from_block_data(
308         struct btrfsic_block_data_ctx *block_ctx,
309         void *dst, u32 offset, size_t len);
310 static int btrfsic_create_link_to_next_block(
311                 struct btrfsic_state *state,
312                 struct btrfsic_block *block,
313                 struct btrfsic_block_data_ctx
314                 *block_ctx, u64 next_bytenr,
315                 int limit_nesting,
316                 struct btrfsic_block_data_ctx *next_block_ctx,
317                 struct btrfsic_block **next_blockp,
318                 int force_iodone_flag,
319                 int *num_copiesp, int *mirror_nump,
320                 struct btrfs_disk_key *disk_key,
321                 u64 parent_generation);
322 static int btrfsic_handle_extent_data(struct btrfsic_state *state,
323                                       struct btrfsic_block *block,
324                                       struct btrfsic_block_data_ctx *block_ctx,
325                                       u32 item_offset, int force_iodone_flag);
326 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
327                              struct btrfsic_block_data_ctx *block_ctx_out,
328                              int mirror_num);
329 static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
330                                   u32 len, struct block_device *bdev,
331                                   struct btrfsic_block_data_ctx *block_ctx_out);
332 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
333 static int btrfsic_read_block(struct btrfsic_state *state,
334                               struct btrfsic_block_data_ctx *block_ctx);
335 static void btrfsic_dump_database(struct btrfsic_state *state);
336 static void btrfsic_complete_bio_end_io(struct bio *bio, int err);
337 static int btrfsic_test_for_metadata(struct btrfsic_state *state,
338                                      char **datav, unsigned int num_pages);
339 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
340                                           u64 dev_bytenr, char **mapped_datav,
341                                           unsigned int num_pages,
342                                           struct bio *bio, int *bio_is_patched,
343                                           struct buffer_head *bh,
344                                           int submit_bio_bh_rw);
345 static int btrfsic_process_written_superblock(
346                 struct btrfsic_state *state,
347                 struct btrfsic_block *const block,
348                 struct btrfs_super_block *const super_hdr);
349 static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status);
350 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
351 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
352                                               const struct btrfsic_block *block,
353                                               int recursion_level);
354 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
355                                         struct btrfsic_block *const block,
356                                         int recursion_level);
357 static void btrfsic_print_add_link(const struct btrfsic_state *state,
358                                    const struct btrfsic_block_link *l);
359 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
360                                    const struct btrfsic_block_link *l);
361 static char btrfsic_get_block_type(const struct btrfsic_state *state,
362                                    const struct btrfsic_block *block);
363 static void btrfsic_dump_tree(const struct btrfsic_state *state);
364 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
365                                   const struct btrfsic_block *block,
366                                   int indent_level);
367 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
368                 struct btrfsic_state *state,
369                 struct btrfsic_block_data_ctx *next_block_ctx,
370                 struct btrfsic_block *next_block,
371                 struct btrfsic_block *from_block,
372                 u64 parent_generation);
373 static struct btrfsic_block *btrfsic_block_lookup_or_add(
374                 struct btrfsic_state *state,
375                 struct btrfsic_block_data_ctx *block_ctx,
376                 const char *additional_string,
377                 int is_metadata,
378                 int is_iodone,
379                 int never_written,
380                 int mirror_num,
381                 int *was_created);
382 static int btrfsic_process_superblock_dev_mirror(
383                 struct btrfsic_state *state,
384                 struct btrfsic_dev_state *dev_state,
385                 struct btrfs_device *device,
386                 int superblock_mirror_num,
387                 struct btrfsic_dev_state **selected_dev_state,
388                 struct btrfs_super_block *selected_super);
389 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
390                 struct block_device *bdev);
391 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
392                                            u64 bytenr,
393                                            struct btrfsic_dev_state *dev_state,
394                                            u64 dev_bytenr);
395
396 static struct mutex btrfsic_mutex;
397 static int btrfsic_is_initialized;
398 static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;
399
400
401 static void btrfsic_block_init(struct btrfsic_block *b)
402 {
403         b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
404         b->dev_state = NULL;
405         b->dev_bytenr = 0;
406         b->logical_bytenr = 0;
407         b->generation = BTRFSIC_GENERATION_UNKNOWN;
408         b->disk_key.objectid = 0;
409         b->disk_key.type = 0;
410         b->disk_key.offset = 0;
411         b->is_metadata = 0;
412         b->is_superblock = 0;
413         b->is_iodone = 0;
414         b->iodone_w_error = 0;
415         b->never_written = 0;
416         b->mirror_num = 0;
417         b->next_in_same_bio = NULL;
418         b->orig_bio_bh_private = NULL;
419         b->orig_bio_bh_end_io.bio = NULL;
420         INIT_LIST_HEAD(&b->collision_resolving_node);
421         INIT_LIST_HEAD(&b->all_blocks_node);
422         INIT_LIST_HEAD(&b->ref_to_list);
423         INIT_LIST_HEAD(&b->ref_from_list);
424         b->submit_bio_bh_rw = 0;
425         b->flush_gen = 0;
426 }
427
428 static struct btrfsic_block *btrfsic_block_alloc(void)
429 {
430         struct btrfsic_block *b;
431
432         b = kzalloc(sizeof(*b), GFP_NOFS);
433         if (NULL != b)
434                 btrfsic_block_init(b);
435
436         return b;
437 }
438
439 static void btrfsic_block_free(struct btrfsic_block *b)
440 {
441         BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
442         kfree(b);
443 }
444
445 static void btrfsic_block_link_init(struct btrfsic_block_link *l)
446 {
447         l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
448         l->ref_cnt = 1;
449         INIT_LIST_HEAD(&l->node_ref_to);
450         INIT_LIST_HEAD(&l->node_ref_from);
451         INIT_LIST_HEAD(&l->collision_resolving_node);
452         l->block_ref_to = NULL;
453         l->block_ref_from = NULL;
454 }
455
456 static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
457 {
458         struct btrfsic_block_link *l;
459
460         l = kzalloc(sizeof(*l), GFP_NOFS);
461         if (NULL != l)
462                 btrfsic_block_link_init(l);
463
464         return l;
465 }
466
467 static void btrfsic_block_link_free(struct btrfsic_block_link *l)
468 {
469         BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
470         kfree(l);
471 }
472
473 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
474 {
475         ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
476         ds->bdev = NULL;
477         ds->state = NULL;
478         ds->name[0] = '\0';
479         INIT_LIST_HEAD(&ds->collision_resolving_node);
480         ds->last_flush_gen = 0;
481         btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
482         ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
483         ds->dummy_block_for_bio_bh_flush.dev_state = ds;
484 }
485
486 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
487 {
488         struct btrfsic_dev_state *ds;
489
490         ds = kzalloc(sizeof(*ds), GFP_NOFS);
491         if (NULL != ds)
492                 btrfsic_dev_state_init(ds);
493
494         return ds;
495 }
496
497 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
498 {
499         BUG_ON(!(NULL == ds ||
500                  BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
501         kfree(ds);
502 }
503
504 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
505 {
506         int i;
507
508         for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
509                 INIT_LIST_HEAD(h->table + i);
510 }
511
512 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
513                                         struct btrfsic_block_hashtable *h)
514 {
515         const unsigned int hashval =
516             (((unsigned int)(b->dev_bytenr >> 16)) ^
517              ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
518              (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
519
520         list_add(&b->collision_resolving_node, h->table + hashval);
521 }
522
523 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
524 {
525         list_del(&b->collision_resolving_node);
526 }
527
528 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
529                 struct block_device *bdev,
530                 u64 dev_bytenr,
531                 struct btrfsic_block_hashtable *h)
532 {
533         const unsigned int hashval =
534             (((unsigned int)(dev_bytenr >> 16)) ^
535              ((unsigned int)((uintptr_t)bdev))) &
536              (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
537         struct list_head *elem;
538
539         list_for_each(elem, h->table + hashval) {
540                 struct btrfsic_block *const b =
541                     list_entry(elem, struct btrfsic_block,
542                                collision_resolving_node);
543
544                 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
545                         return b;
546         }
547
548         return NULL;
549 }
550
551 static void btrfsic_block_link_hashtable_init(
552                 struct btrfsic_block_link_hashtable *h)
553 {
554         int i;
555
556         for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
557                 INIT_LIST_HEAD(h->table + i);
558 }
559
560 static void btrfsic_block_link_hashtable_add(
561                 struct btrfsic_block_link *l,
562                 struct btrfsic_block_link_hashtable *h)
563 {
564         const unsigned int hashval =
565             (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
566              ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
567              ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
568              ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
569              & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
570
571         BUG_ON(NULL == l->block_ref_to);
572         BUG_ON(NULL == l->block_ref_from);
573         list_add(&l->collision_resolving_node, h->table + hashval);
574 }
575
576 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
577 {
578         list_del(&l->collision_resolving_node);
579 }
580
581 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
582                 struct block_device *bdev_ref_to,
583                 u64 dev_bytenr_ref_to,
584                 struct block_device *bdev_ref_from,
585                 u64 dev_bytenr_ref_from,
586                 struct btrfsic_block_link_hashtable *h)
587 {
588         const unsigned int hashval =
589             (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
590              ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
591              ((unsigned int)((uintptr_t)bdev_ref_to)) ^
592              ((unsigned int)((uintptr_t)bdev_ref_from))) &
593              (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
594         struct list_head *elem;
595
596         list_for_each(elem, h->table + hashval) {
597                 struct btrfsic_block_link *const l =
598                     list_entry(elem, struct btrfsic_block_link,
599                                collision_resolving_node);
600
601                 BUG_ON(NULL == l->block_ref_to);
602                 BUG_ON(NULL == l->block_ref_from);
603                 if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
604                     l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
605                     l->block_ref_from->dev_state->bdev == bdev_ref_from &&
606                     l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
607                         return l;
608         }
609
610         return NULL;
611 }
612
613 static void btrfsic_dev_state_hashtable_init(
614                 struct btrfsic_dev_state_hashtable *h)
615 {
616         int i;
617
618         for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
619                 INIT_LIST_HEAD(h->table + i);
620 }
621
622 static void btrfsic_dev_state_hashtable_add(
623                 struct btrfsic_dev_state *ds,
624                 struct btrfsic_dev_state_hashtable *h)
625 {
626         const unsigned int hashval =
627             (((unsigned int)((uintptr_t)ds->bdev)) &
628              (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
629
630         list_add(&ds->collision_resolving_node, h->table + hashval);
631 }
632
633 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
634 {
635         list_del(&ds->collision_resolving_node);
636 }
637
638 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
639                 struct block_device *bdev,
640                 struct btrfsic_dev_state_hashtable *h)
641 {
642         const unsigned int hashval =
643             (((unsigned int)((uintptr_t)bdev)) &
644              (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
645         struct list_head *elem;
646
647         list_for_each(elem, h->table + hashval) {
648                 struct btrfsic_dev_state *const ds =
649                     list_entry(elem, struct btrfsic_dev_state,
650                                collision_resolving_node);
651
652                 if (ds->bdev == bdev)
653                         return ds;
654         }
655
656         return NULL;
657 }
658
659 static int btrfsic_process_superblock(struct btrfsic_state *state,
660                                       struct btrfs_fs_devices *fs_devices)
661 {
662         int ret = 0;
663         struct btrfs_super_block *selected_super;
664         struct list_head *dev_head = &fs_devices->devices;
665         struct btrfs_device *device;
666         struct btrfsic_dev_state *selected_dev_state = NULL;
667         int pass;
668
669         BUG_ON(NULL == state);
670         selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
671         if (NULL == selected_super) {
672                 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
673                 return -1;
674         }
675
676         list_for_each_entry(device, dev_head, dev_list) {
677                 int i;
678                 struct btrfsic_dev_state *dev_state;
679
680                 if (!device->bdev || !device->name)
681                         continue;
682
683                 dev_state = btrfsic_dev_state_lookup(device->bdev);
684                 BUG_ON(NULL == dev_state);
685                 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
686                         ret = btrfsic_process_superblock_dev_mirror(
687                                         state, dev_state, device, i,
688                                         &selected_dev_state, selected_super);
689                         if (0 != ret && 0 == i) {
690                                 kfree(selected_super);
691                                 return ret;
692                         }
693                 }
694         }
695
696         if (NULL == state->latest_superblock) {
697                 printk(KERN_INFO "btrfsic: no superblock found!\n");
698                 kfree(selected_super);
699                 return -1;
700         }
701
702         state->csum_size = btrfs_super_csum_size(selected_super);
703
704         for (pass = 0; pass < 3; pass++) {
705                 int num_copies;
706                 int mirror_num;
707                 u64 next_bytenr;
708
709                 switch (pass) {
710                 case 0:
711                         next_bytenr = btrfs_super_root(selected_super);
712                         if (state->print_mask &
713                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
714                                 printk(KERN_INFO "root@%llu\n", next_bytenr);
715                         break;
716                 case 1:
717                         next_bytenr = btrfs_super_chunk_root(selected_super);
718                         if (state->print_mask &
719                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
720                                 printk(KERN_INFO "chunk@%llu\n", next_bytenr);
721                         break;
722                 case 2:
723                         next_bytenr = btrfs_super_log_root(selected_super);
724                         if (0 == next_bytenr)
725                                 continue;
726                         if (state->print_mask &
727                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
728                                 printk(KERN_INFO "log@%llu\n", next_bytenr);
729                         break;
730                 }
731
732                 num_copies =
733                     btrfs_num_copies(state->root->fs_info,
734                                      next_bytenr, state->metablock_size);
735                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
736                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
737                                next_bytenr, num_copies);
738
739                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
740                         struct btrfsic_block *next_block;
741                         struct btrfsic_block_data_ctx tmp_next_block_ctx;
742                         struct btrfsic_block_link *l;
743
744                         ret = btrfsic_map_block(state, next_bytenr,
745                                                 state->metablock_size,
746                                                 &tmp_next_block_ctx,
747                                                 mirror_num);
748                         if (ret) {
749                                 printk(KERN_INFO "btrfsic:"
750                                        " btrfsic_map_block(root @%llu,"
751                                        " mirror %d) failed!\n",
752                                        next_bytenr, mirror_num);
753                                 kfree(selected_super);
754                                 return -1;
755                         }
756
757                         next_block = btrfsic_block_hashtable_lookup(
758                                         tmp_next_block_ctx.dev->bdev,
759                                         tmp_next_block_ctx.dev_bytenr,
760                                         &state->block_hashtable);
761                         BUG_ON(NULL == next_block);
762
763                         l = btrfsic_block_link_hashtable_lookup(
764                                         tmp_next_block_ctx.dev->bdev,
765                                         tmp_next_block_ctx.dev_bytenr,
766                                         state->latest_superblock->dev_state->
767                                         bdev,
768                                         state->latest_superblock->dev_bytenr,
769                                         &state->block_link_hashtable);
770                         BUG_ON(NULL == l);
771
772                         ret = btrfsic_read_block(state, &tmp_next_block_ctx);
773                         if (ret < (int)PAGE_CACHE_SIZE) {
774                                 printk(KERN_INFO
775                                        "btrfsic: read @logical %llu failed!\n",
776                                        tmp_next_block_ctx.start);
777                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
778                                 kfree(selected_super);
779                                 return -1;
780                         }
781
782                         ret = btrfsic_process_metablock(state,
783                                                         next_block,
784                                                         &tmp_next_block_ctx,
785                                                         BTRFS_MAX_LEVEL + 3, 1);
786                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
787                 }
788         }
789
790         kfree(selected_super);
791         return ret;
792 }
793
794 static int btrfsic_process_superblock_dev_mirror(
795                 struct btrfsic_state *state,
796                 struct btrfsic_dev_state *dev_state,
797                 struct btrfs_device *device,
798                 int superblock_mirror_num,
799                 struct btrfsic_dev_state **selected_dev_state,
800                 struct btrfs_super_block *selected_super)
801 {
802         struct btrfs_super_block *super_tmp;
803         u64 dev_bytenr;
804         struct buffer_head *bh;
805         struct btrfsic_block *superblock_tmp;
806         int pass;
807         struct block_device *const superblock_bdev = device->bdev;
808
809         /* super block bytenr is always the unmapped device bytenr */
810         dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
811         if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
812                 return -1;
813         bh = __bread(superblock_bdev, dev_bytenr / 4096,
814                      BTRFS_SUPER_INFO_SIZE);
815         if (NULL == bh)
816                 return -1;
817         super_tmp = (struct btrfs_super_block *)
818             (bh->b_data + (dev_bytenr & 4095));
819
820         if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
821             btrfs_super_magic(super_tmp) != BTRFS_MAGIC ||
822             memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
823             btrfs_super_nodesize(super_tmp) != state->metablock_size ||
824             btrfs_super_leafsize(super_tmp) != state->metablock_size ||
825             btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
826                 brelse(bh);
827                 return 0;
828         }
829
830         superblock_tmp =
831             btrfsic_block_hashtable_lookup(superblock_bdev,
832                                            dev_bytenr,
833                                            &state->block_hashtable);
834         if (NULL == superblock_tmp) {
835                 superblock_tmp = btrfsic_block_alloc();
836                 if (NULL == superblock_tmp) {
837                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
838                         brelse(bh);
839                         return -1;
840                 }
841                 /* for superblock, only the dev_bytenr makes sense */
842                 superblock_tmp->dev_bytenr = dev_bytenr;
843                 superblock_tmp->dev_state = dev_state;
844                 superblock_tmp->logical_bytenr = dev_bytenr;
845                 superblock_tmp->generation = btrfs_super_generation(super_tmp);
846                 superblock_tmp->is_metadata = 1;
847                 superblock_tmp->is_superblock = 1;
848                 superblock_tmp->is_iodone = 1;
849                 superblock_tmp->never_written = 0;
850                 superblock_tmp->mirror_num = 1 + superblock_mirror_num;
851                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
852                         printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
853                                      " @%llu (%s/%llu/%d)\n",
854                                      superblock_bdev,
855                                      rcu_str_deref(device->name), dev_bytenr,
856                                      dev_state->name, dev_bytenr,
857                                      superblock_mirror_num);
858                 list_add(&superblock_tmp->all_blocks_node,
859                          &state->all_blocks_list);
860                 btrfsic_block_hashtable_add(superblock_tmp,
861                                             &state->block_hashtable);
862         }
863
864         /* select the one with the highest generation field */
865         if (btrfs_super_generation(super_tmp) >
866             state->max_superblock_generation ||
867             0 == state->max_superblock_generation) {
868                 memcpy(selected_super, super_tmp, sizeof(*selected_super));
869                 *selected_dev_state = dev_state;
870                 state->max_superblock_generation =
871                     btrfs_super_generation(super_tmp);
872                 state->latest_superblock = superblock_tmp;
873         }
874
875         for (pass = 0; pass < 3; pass++) {
876                 u64 next_bytenr;
877                 int num_copies;
878                 int mirror_num;
879                 const char *additional_string = NULL;
880                 struct btrfs_disk_key tmp_disk_key;
881
882                 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
883                 tmp_disk_key.offset = 0;
884                 switch (pass) {
885                 case 0:
886                         btrfs_set_disk_key_objectid(&tmp_disk_key,
887                                                     BTRFS_ROOT_TREE_OBJECTID);
888                         additional_string = "initial root ";
889                         next_bytenr = btrfs_super_root(super_tmp);
890                         break;
891                 case 1:
892                         btrfs_set_disk_key_objectid(&tmp_disk_key,
893                                                     BTRFS_CHUNK_TREE_OBJECTID);
894                         additional_string = "initial chunk ";
895                         next_bytenr = btrfs_super_chunk_root(super_tmp);
896                         break;
897                 case 2:
898                         btrfs_set_disk_key_objectid(&tmp_disk_key,
899                                                     BTRFS_TREE_LOG_OBJECTID);
900                         additional_string = "initial log ";
901                         next_bytenr = btrfs_super_log_root(super_tmp);
902                         if (0 == next_bytenr)
903                                 continue;
904                         break;
905                 }
906
907                 num_copies =
908                     btrfs_num_copies(state->root->fs_info,
909                                      next_bytenr, state->metablock_size);
910                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
911                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
912                                next_bytenr, num_copies);
913                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
914                         struct btrfsic_block *next_block;
915                         struct btrfsic_block_data_ctx tmp_next_block_ctx;
916                         struct btrfsic_block_link *l;
917
918                         if (btrfsic_map_block(state, next_bytenr,
919                                               state->metablock_size,
920                                               &tmp_next_block_ctx,
921                                               mirror_num)) {
922                                 printk(KERN_INFO "btrfsic: btrfsic_map_block("
923                                        "bytenr @%llu, mirror %d) failed!\n",
924                                        next_bytenr, mirror_num);
925                                 brelse(bh);
926                                 return -1;
927                         }
928
929                         next_block = btrfsic_block_lookup_or_add(
930                                         state, &tmp_next_block_ctx,
931                                         additional_string, 1, 1, 0,
932                                         mirror_num, NULL);
933                         if (NULL == next_block) {
934                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
935                                 brelse(bh);
936                                 return -1;
937                         }
938
939                         next_block->disk_key = tmp_disk_key;
940                         next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
941                         l = btrfsic_block_link_lookup_or_add(
942                                         state, &tmp_next_block_ctx,
943                                         next_block, superblock_tmp,
944                                         BTRFSIC_GENERATION_UNKNOWN);
945                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
946                         if (NULL == l) {
947                                 brelse(bh);
948                                 return -1;
949                         }
950                 }
951         }
952         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
953                 btrfsic_dump_tree_sub(state, superblock_tmp, 0);
954
955         brelse(bh);
956         return 0;
957 }
958
959 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
960 {
961         struct btrfsic_stack_frame *sf;
962
963         sf = kzalloc(sizeof(*sf), GFP_NOFS);
964         if (NULL == sf)
965                 printk(KERN_INFO "btrfsic: alloc memory failed!\n");
966         else
967                 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
968         return sf;
969 }
970
971 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
972 {
973         BUG_ON(!(NULL == sf ||
974                  BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
975         kfree(sf);
976 }
977
978 static int btrfsic_process_metablock(
979                 struct btrfsic_state *state,
980                 struct btrfsic_block *const first_block,
981                 struct btrfsic_block_data_ctx *const first_block_ctx,
982                 int first_limit_nesting, int force_iodone_flag)
983 {
984         struct btrfsic_stack_frame initial_stack_frame = { 0 };
985         struct btrfsic_stack_frame *sf;
986         struct btrfsic_stack_frame *next_stack;
987         struct btrfs_header *const first_hdr =
988                 (struct btrfs_header *)first_block_ctx->datav[0];
989
990         BUG_ON(!first_hdr);
991         sf = &initial_stack_frame;
992         sf->error = 0;
993         sf->i = -1;
994         sf->limit_nesting = first_limit_nesting;
995         sf->block = first_block;
996         sf->block_ctx = first_block_ctx;
997         sf->next_block = NULL;
998         sf->hdr = first_hdr;
999         sf->prev = NULL;
1000
1001 continue_with_new_stack_frame:
1002         sf->block->generation = le64_to_cpu(sf->hdr->generation);
1003         if (0 == sf->hdr->level) {
1004                 struct btrfs_leaf *const leafhdr =
1005                     (struct btrfs_leaf *)sf->hdr;
1006
1007                 if (-1 == sf->i) {
1008                         sf->nr = btrfs_stack_header_nritems(&leafhdr->header);
1009
1010                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1011                                 printk(KERN_INFO
1012                                        "leaf %llu items %d generation %llu"
1013                                        " owner %llu\n",
1014                                        sf->block_ctx->start, sf->nr,
1015                                        btrfs_stack_header_generation(
1016                                                &leafhdr->header),
1017                                        btrfs_stack_header_owner(
1018                                                &leafhdr->header));
1019                 }
1020
1021 continue_with_current_leaf_stack_frame:
1022                 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1023                         sf->i++;
1024                         sf->num_copies = 0;
1025                 }
1026
1027                 if (sf->i < sf->nr) {
1028                         struct btrfs_item disk_item;
1029                         u32 disk_item_offset =
1030                                 (uintptr_t)(leafhdr->items + sf->i) -
1031                                 (uintptr_t)leafhdr;
1032                         struct btrfs_disk_key *disk_key;
1033                         u8 type;
1034                         u32 item_offset;
1035                         u32 item_size;
1036
1037                         if (disk_item_offset + sizeof(struct btrfs_item) >
1038                             sf->block_ctx->len) {
1039 leaf_item_out_of_bounce_error:
1040                                 printk(KERN_INFO
1041                                        "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
1042                                        sf->block_ctx->start,
1043                                        sf->block_ctx->dev->name);
1044                                 goto one_stack_frame_backwards;
1045                         }
1046                         btrfsic_read_from_block_data(sf->block_ctx,
1047                                                      &disk_item,
1048                                                      disk_item_offset,
1049                                                      sizeof(struct btrfs_item));
1050                         item_offset = btrfs_stack_item_offset(&disk_item);
1051                         item_size = btrfs_stack_item_size(&disk_item);
1052                         disk_key = &disk_item.key;
1053                         type = btrfs_disk_key_type(disk_key);
1054
1055                         if (BTRFS_ROOT_ITEM_KEY == type) {
1056                                 struct btrfs_root_item root_item;
1057                                 u32 root_item_offset;
1058                                 u64 next_bytenr;
1059
1060                                 root_item_offset = item_offset +
1061                                         offsetof(struct btrfs_leaf, items);
1062                                 if (root_item_offset + item_size >
1063                                     sf->block_ctx->len)
1064                                         goto leaf_item_out_of_bounce_error;
1065                                 btrfsic_read_from_block_data(
1066                                         sf->block_ctx, &root_item,
1067                                         root_item_offset,
1068                                         item_size);
1069                                 next_bytenr = btrfs_root_bytenr(&root_item);
1070
1071                                 sf->error =
1072                                     btrfsic_create_link_to_next_block(
1073                                                 state,
1074                                                 sf->block,
1075                                                 sf->block_ctx,
1076                                                 next_bytenr,
1077                                                 sf->limit_nesting,
1078                                                 &sf->next_block_ctx,
1079                                                 &sf->next_block,
1080                                                 force_iodone_flag,
1081                                                 &sf->num_copies,
1082                                                 &sf->mirror_num,
1083                                                 disk_key,
1084                                                 btrfs_root_generation(
1085                                                 &root_item));
1086                                 if (sf->error)
1087                                         goto one_stack_frame_backwards;
1088
1089                                 if (NULL != sf->next_block) {
1090                                         struct btrfs_header *const next_hdr =
1091                                             (struct btrfs_header *)
1092                                             sf->next_block_ctx.datav[0];
1093
1094                                         next_stack =
1095                                             btrfsic_stack_frame_alloc();
1096                                         if (NULL == next_stack) {
1097                                                 btrfsic_release_block_ctx(
1098                                                                 &sf->
1099                                                                 next_block_ctx);
1100                                                 goto one_stack_frame_backwards;
1101                                         }
1102
1103                                         next_stack->i = -1;
1104                                         next_stack->block = sf->next_block;
1105                                         next_stack->block_ctx =
1106                                             &sf->next_block_ctx;
1107                                         next_stack->next_block = NULL;
1108                                         next_stack->hdr = next_hdr;
1109                                         next_stack->limit_nesting =
1110                                             sf->limit_nesting - 1;
1111                                         next_stack->prev = sf;
1112                                         sf = next_stack;
1113                                         goto continue_with_new_stack_frame;
1114                                 }
1115                         } else if (BTRFS_EXTENT_DATA_KEY == type &&
1116                                    state->include_extent_data) {
1117                                 sf->error = btrfsic_handle_extent_data(
1118                                                 state,
1119                                                 sf->block,
1120                                                 sf->block_ctx,
1121                                                 item_offset,
1122                                                 force_iodone_flag);
1123                                 if (sf->error)
1124                                         goto one_stack_frame_backwards;
1125                         }
1126
1127                         goto continue_with_current_leaf_stack_frame;
1128                 }
1129         } else {
1130                 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
1131
1132                 if (-1 == sf->i) {
1133                         sf->nr = btrfs_stack_header_nritems(&nodehdr->header);
1134
1135                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1136                                 printk(KERN_INFO "node %llu level %d items %d"
1137                                        " generation %llu owner %llu\n",
1138                                        sf->block_ctx->start,
1139                                        nodehdr->header.level, sf->nr,
1140                                        btrfs_stack_header_generation(
1141                                        &nodehdr->header),
1142                                        btrfs_stack_header_owner(
1143                                        &nodehdr->header));
1144                 }
1145
1146 continue_with_current_node_stack_frame:
1147                 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1148                         sf->i++;
1149                         sf->num_copies = 0;
1150                 }
1151
1152                 if (sf->i < sf->nr) {
1153                         struct btrfs_key_ptr key_ptr;
1154                         u32 key_ptr_offset;
1155                         u64 next_bytenr;
1156
1157                         key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
1158                                           (uintptr_t)nodehdr;
1159                         if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
1160                             sf->block_ctx->len) {
1161                                 printk(KERN_INFO
1162                                        "btrfsic: node item out of bounce at logical %llu, dev %s\n",
1163                                        sf->block_ctx->start,
1164                                        sf->block_ctx->dev->name);
1165                                 goto one_stack_frame_backwards;
1166                         }
1167                         btrfsic_read_from_block_data(
1168                                 sf->block_ctx, &key_ptr, key_ptr_offset,
1169                                 sizeof(struct btrfs_key_ptr));
1170                         next_bytenr = btrfs_stack_key_blockptr(&key_ptr);
1171
1172                         sf->error = btrfsic_create_link_to_next_block(
1173                                         state,
1174                                         sf->block,
1175                                         sf->block_ctx,
1176                                         next_bytenr,
1177                                         sf->limit_nesting,
1178                                         &sf->next_block_ctx,
1179                                         &sf->next_block,
1180                                         force_iodone_flag,
1181                                         &sf->num_copies,
1182                                         &sf->mirror_num,
1183                                         &key_ptr.key,
1184                                         btrfs_stack_key_generation(&key_ptr));
1185                         if (sf->error)
1186                                 goto one_stack_frame_backwards;
1187
1188                         if (NULL != sf->next_block) {
1189                                 struct btrfs_header *const next_hdr =
1190                                     (struct btrfs_header *)
1191                                     sf->next_block_ctx.datav[0];
1192
1193                                 next_stack = btrfsic_stack_frame_alloc();
1194                                 if (NULL == next_stack)
1195                                         goto one_stack_frame_backwards;
1196
1197                                 next_stack->i = -1;
1198                                 next_stack->block = sf->next_block;
1199                                 next_stack->block_ctx = &sf->next_block_ctx;
1200                                 next_stack->next_block = NULL;
1201                                 next_stack->hdr = next_hdr;
1202                                 next_stack->limit_nesting =
1203                                     sf->limit_nesting - 1;
1204                                 next_stack->prev = sf;
1205                                 sf = next_stack;
1206                                 goto continue_with_new_stack_frame;
1207                         }
1208
1209                         goto continue_with_current_node_stack_frame;
1210                 }
1211         }
1212
1213 one_stack_frame_backwards:
1214         if (NULL != sf->prev) {
1215                 struct btrfsic_stack_frame *const prev = sf->prev;
1216
1217                 /* the one for the initial block is freed in the caller */
1218                 btrfsic_release_block_ctx(sf->block_ctx);
1219
1220                 if (sf->error) {
1221                         prev->error = sf->error;
1222                         btrfsic_stack_frame_free(sf);
1223                         sf = prev;
1224                         goto one_stack_frame_backwards;
1225                 }
1226
1227                 btrfsic_stack_frame_free(sf);
1228                 sf = prev;
1229                 goto continue_with_new_stack_frame;
1230         } else {
1231                 BUG_ON(&initial_stack_frame != sf);
1232         }
1233
1234         return sf->error;
1235 }
1236
1237 static void btrfsic_read_from_block_data(
1238         struct btrfsic_block_data_ctx *block_ctx,
1239         void *dstv, u32 offset, size_t len)
1240 {
1241         size_t cur;
1242         size_t offset_in_page;
1243         char *kaddr;
1244         char *dst = (char *)dstv;
1245         size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
1246         unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
1247
1248         WARN_ON(offset + len > block_ctx->len);
1249         offset_in_page = (start_offset + offset) & (PAGE_CACHE_SIZE - 1);
1250
1251         while (len > 0) {
1252                 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
1253                 BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >>
1254                             PAGE_CACHE_SHIFT);
1255                 kaddr = block_ctx->datav[i];
1256                 memcpy(dst, kaddr + offset_in_page, cur);
1257
1258                 dst += cur;
1259                 len -= cur;
1260                 offset_in_page = 0;
1261                 i++;
1262         }
1263 }
1264
1265 static int btrfsic_create_link_to_next_block(
1266                 struct btrfsic_state *state,
1267                 struct btrfsic_block *block,
1268                 struct btrfsic_block_data_ctx *block_ctx,
1269                 u64 next_bytenr,
1270                 int limit_nesting,
1271                 struct btrfsic_block_data_ctx *next_block_ctx,
1272                 struct btrfsic_block **next_blockp,
1273                 int force_iodone_flag,
1274                 int *num_copiesp, int *mirror_nump,
1275                 struct btrfs_disk_key *disk_key,
1276                 u64 parent_generation)
1277 {
1278         struct btrfsic_block *next_block = NULL;
1279         int ret;
1280         struct btrfsic_block_link *l;
1281         int did_alloc_block_link;
1282         int block_was_created;
1283
1284         *next_blockp = NULL;
1285         if (0 == *num_copiesp) {
1286                 *num_copiesp =
1287                     btrfs_num_copies(state->root->fs_info,
1288                                      next_bytenr, state->metablock_size);
1289                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1290                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1291                                next_bytenr, *num_copiesp);
1292                 *mirror_nump = 1;
1293         }
1294
1295         if (*mirror_nump > *num_copiesp)
1296                 return 0;
1297
1298         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1299                 printk(KERN_INFO
1300                        "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1301                        *mirror_nump);
1302         ret = btrfsic_map_block(state, next_bytenr,
1303                                 state->metablock_size,
1304                                 next_block_ctx, *mirror_nump);
1305         if (ret) {
1306                 printk(KERN_INFO
1307                        "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1308                        next_bytenr, *mirror_nump);
1309                 btrfsic_release_block_ctx(next_block_ctx);
1310                 *next_blockp = NULL;
1311                 return -1;
1312         }
1313
1314         next_block = btrfsic_block_lookup_or_add(state,
1315                                                  next_block_ctx, "referenced ",
1316                                                  1, force_iodone_flag,
1317                                                  !force_iodone_flag,
1318                                                  *mirror_nump,
1319                                                  &block_was_created);
1320         if (NULL == next_block) {
1321                 btrfsic_release_block_ctx(next_block_ctx);
1322                 *next_blockp = NULL;
1323                 return -1;
1324         }
1325         if (block_was_created) {
1326                 l = NULL;
1327                 next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
1328         } else {
1329                 if (next_block->logical_bytenr != next_bytenr &&
1330                     !(!next_block->is_metadata &&
1331                       0 == next_block->logical_bytenr)) {
1332                         printk(KERN_INFO
1333                                "Referenced block @%llu (%s/%llu/%d)"
1334                                " found in hash table, %c,"
1335                                " bytenr mismatch (!= stored %llu).\n",
1336                                next_bytenr, next_block_ctx->dev->name,
1337                                next_block_ctx->dev_bytenr, *mirror_nump,
1338                                btrfsic_get_block_type(state, next_block),
1339                                next_block->logical_bytenr);
1340                 } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1341                         printk(KERN_INFO
1342                                "Referenced block @%llu (%s/%llu/%d)"
1343                                " found in hash table, %c.\n",
1344                                next_bytenr, next_block_ctx->dev->name,
1345                                next_block_ctx->dev_bytenr, *mirror_nump,
1346                                btrfsic_get_block_type(state, next_block));
1347                 next_block->logical_bytenr = next_bytenr;
1348
1349                 next_block->mirror_num = *mirror_nump;
1350                 l = btrfsic_block_link_hashtable_lookup(
1351                                 next_block_ctx->dev->bdev,
1352                                 next_block_ctx->dev_bytenr,
1353                                 block_ctx->dev->bdev,
1354                                 block_ctx->dev_bytenr,
1355                                 &state->block_link_hashtable);
1356         }
1357
1358         next_block->disk_key = *disk_key;
1359         if (NULL == l) {
1360                 l = btrfsic_block_link_alloc();
1361                 if (NULL == l) {
1362                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
1363                         btrfsic_release_block_ctx(next_block_ctx);
1364                         *next_blockp = NULL;
1365                         return -1;
1366                 }
1367
1368                 did_alloc_block_link = 1;
1369                 l->block_ref_to = next_block;
1370                 l->block_ref_from = block;
1371                 l->ref_cnt = 1;
1372                 l->parent_generation = parent_generation;
1373
1374                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1375                         btrfsic_print_add_link(state, l);
1376
1377                 list_add(&l->node_ref_to, &block->ref_to_list);
1378                 list_add(&l->node_ref_from, &next_block->ref_from_list);
1379
1380                 btrfsic_block_link_hashtable_add(l,
1381                                                  &state->block_link_hashtable);
1382         } else {
1383                 did_alloc_block_link = 0;
1384                 if (0 == limit_nesting) {
1385                         l->ref_cnt++;
1386                         l->parent_generation = parent_generation;
1387                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1388                                 btrfsic_print_add_link(state, l);
1389                 }
1390         }
1391
1392         if (limit_nesting > 0 && did_alloc_block_link) {
1393                 ret = btrfsic_read_block(state, next_block_ctx);
1394                 if (ret < (int)next_block_ctx->len) {
1395                         printk(KERN_INFO
1396                                "btrfsic: read block @logical %llu failed!\n",
1397                                next_bytenr);
1398                         btrfsic_release_block_ctx(next_block_ctx);
1399                         *next_blockp = NULL;
1400                         return -1;
1401                 }
1402
1403                 *next_blockp = next_block;
1404         } else {
1405                 *next_blockp = NULL;
1406         }
1407         (*mirror_nump)++;
1408
1409         return 0;
1410 }
1411
1412 static int btrfsic_handle_extent_data(
1413                 struct btrfsic_state *state,
1414                 struct btrfsic_block *block,
1415                 struct btrfsic_block_data_ctx *block_ctx,
1416                 u32 item_offset, int force_iodone_flag)
1417 {
1418         int ret;
1419         struct btrfs_file_extent_item file_extent_item;
1420         u64 file_extent_item_offset;
1421         u64 next_bytenr;
1422         u64 num_bytes;
1423         u64 generation;
1424         struct btrfsic_block_link *l;
1425
1426         file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
1427                                   item_offset;
1428         if (file_extent_item_offset +
1429             offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
1430             block_ctx->len) {
1431                 printk(KERN_INFO
1432                        "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1433                        block_ctx->start, block_ctx->dev->name);
1434                 return -1;
1435         }
1436
1437         btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1438                 file_extent_item_offset,
1439                 offsetof(struct btrfs_file_extent_item, disk_num_bytes));
1440         if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
1441             btrfs_stack_file_extent_disk_bytenr(&file_extent_item) == 0) {
1442                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1443                         printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
1444                                file_extent_item.type,
1445                                btrfs_stack_file_extent_disk_bytenr(
1446                                &file_extent_item));
1447                 return 0;
1448         }
1449
1450         if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
1451             block_ctx->len) {
1452                 printk(KERN_INFO
1453                        "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1454                        block_ctx->start, block_ctx->dev->name);
1455                 return -1;
1456         }
1457         btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1458                                      file_extent_item_offset,
1459                                      sizeof(struct btrfs_file_extent_item));
1460         next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item) +
1461                       btrfs_stack_file_extent_offset(&file_extent_item);
1462         generation = btrfs_stack_file_extent_generation(&file_extent_item);
1463         num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
1464         generation = btrfs_stack_file_extent_generation(&file_extent_item);
1465
1466         if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1467                 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
1468                        " offset = %llu, num_bytes = %llu\n",
1469                        file_extent_item.type,
1470                        btrfs_stack_file_extent_disk_bytenr(&file_extent_item),
1471                        btrfs_stack_file_extent_offset(&file_extent_item),
1472                        num_bytes);
1473         while (num_bytes > 0) {
1474                 u32 chunk_len;
1475                 int num_copies;
1476                 int mirror_num;
1477
1478                 if (num_bytes > state->datablock_size)
1479                         chunk_len = state->datablock_size;
1480                 else
1481                         chunk_len = num_bytes;
1482
1483                 num_copies =
1484                     btrfs_num_copies(state->root->fs_info,
1485                                      next_bytenr, state->datablock_size);
1486                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1487                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1488                                next_bytenr, num_copies);
1489                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
1490                         struct btrfsic_block_data_ctx next_block_ctx;
1491                         struct btrfsic_block *next_block;
1492                         int block_was_created;
1493
1494                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1495                                 printk(KERN_INFO "btrfsic_handle_extent_data("
1496                                        "mirror_num=%d)\n", mirror_num);
1497                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1498                                 printk(KERN_INFO
1499                                        "\tdisk_bytenr = %llu, num_bytes %u\n",
1500                                        next_bytenr, chunk_len);
1501                         ret = btrfsic_map_block(state, next_bytenr,
1502                                                 chunk_len, &next_block_ctx,
1503                                                 mirror_num);
1504                         if (ret) {
1505                                 printk(KERN_INFO
1506                                        "btrfsic: btrfsic_map_block(@%llu,"
1507                                        " mirror=%d) failed!\n",
1508                                        next_bytenr, mirror_num);
1509                                 return -1;
1510                         }
1511
1512                         next_block = btrfsic_block_lookup_or_add(
1513                                         state,
1514                                         &next_block_ctx,
1515                                         "referenced ",
1516                                         0,
1517                                         force_iodone_flag,
1518                                         !force_iodone_flag,
1519                                         mirror_num,
1520                                         &block_was_created);
1521                         if (NULL == next_block) {
1522                                 printk(KERN_INFO
1523                                        "btrfsic: error, kmalloc failed!\n");
1524                                 btrfsic_release_block_ctx(&next_block_ctx);
1525                                 return -1;
1526                         }
1527                         if (!block_was_created) {
1528                                 if (next_block->logical_bytenr != next_bytenr &&
1529                                     !(!next_block->is_metadata &&
1530                                       0 == next_block->logical_bytenr)) {
1531                                         printk(KERN_INFO
1532                                                "Referenced block"
1533                                                " @%llu (%s/%llu/%d)"
1534                                                " found in hash table, D,"
1535                                                " bytenr mismatch"
1536                                                " (!= stored %llu).\n",
1537                                                next_bytenr,
1538                                                next_block_ctx.dev->name,
1539                                                next_block_ctx.dev_bytenr,
1540                                                mirror_num,
1541                                                next_block->logical_bytenr);
1542                                 }
1543                                 next_block->logical_bytenr = next_bytenr;
1544                                 next_block->mirror_num = mirror_num;
1545                         }
1546
1547                         l = btrfsic_block_link_lookup_or_add(state,
1548                                                              &next_block_ctx,
1549                                                              next_block, block,
1550                                                              generation);
1551                         btrfsic_release_block_ctx(&next_block_ctx);
1552                         if (NULL == l)
1553                                 return -1;
1554                 }
1555
1556                 next_bytenr += chunk_len;
1557                 num_bytes -= chunk_len;
1558         }
1559
1560         return 0;
1561 }
1562
1563 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
1564                              struct btrfsic_block_data_ctx *block_ctx_out,
1565                              int mirror_num)
1566 {
1567         int ret;
1568         u64 length;
1569         struct btrfs_bio *multi = NULL;
1570         struct btrfs_device *device;
1571
1572         length = len;
1573         ret = btrfs_map_block(state->root->fs_info, READ,
1574                               bytenr, &length, &multi, mirror_num);
1575
1576         if (ret) {
1577                 block_ctx_out->start = 0;
1578                 block_ctx_out->dev_bytenr = 0;
1579                 block_ctx_out->len = 0;
1580                 block_ctx_out->dev = NULL;
1581                 block_ctx_out->datav = NULL;
1582                 block_ctx_out->pagev = NULL;
1583                 block_ctx_out->mem_to_free = NULL;
1584
1585                 return ret;
1586         }
1587
1588         device = multi->stripes[0].dev;
1589         block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
1590         block_ctx_out->dev_bytenr = multi->stripes[0].physical;
1591         block_ctx_out->start = bytenr;
1592         block_ctx_out->len = len;
1593         block_ctx_out->datav = NULL;
1594         block_ctx_out->pagev = NULL;
1595         block_ctx_out->mem_to_free = NULL;
1596
1597         kfree(multi);
1598         if (NULL == block_ctx_out->dev) {
1599                 ret = -ENXIO;
1600                 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
1601         }
1602
1603         return ret;
1604 }
1605
1606 static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
1607                                   u32 len, struct block_device *bdev,
1608                                   struct btrfsic_block_data_ctx *block_ctx_out)
1609 {
1610         block_ctx_out->dev = btrfsic_dev_state_lookup(bdev);
1611         block_ctx_out->dev_bytenr = bytenr;
1612         block_ctx_out->start = bytenr;
1613         block_ctx_out->len = len;
1614         block_ctx_out->datav = NULL;
1615         block_ctx_out->pagev = NULL;
1616         block_ctx_out->mem_to_free = NULL;
1617         if (NULL != block_ctx_out->dev) {
1618                 return 0;
1619         } else {
1620                 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n");
1621                 return -ENXIO;
1622         }
1623 }
1624
1625 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
1626 {
1627         if (block_ctx->mem_to_free) {
1628                 unsigned int num_pages;
1629
1630                 BUG_ON(!block_ctx->datav);
1631                 BUG_ON(!block_ctx->pagev);
1632                 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1633                             PAGE_CACHE_SHIFT;
1634                 while (num_pages > 0) {
1635                         num_pages--;
1636                         if (block_ctx->datav[num_pages]) {
1637                                 kunmap(block_ctx->pagev[num_pages]);
1638                                 block_ctx->datav[num_pages] = NULL;
1639                         }
1640                         if (block_ctx->pagev[num_pages]) {
1641                                 __free_page(block_ctx->pagev[num_pages]);
1642                                 block_ctx->pagev[num_pages] = NULL;
1643                         }
1644                 }
1645
1646                 kfree(block_ctx->mem_to_free);
1647                 block_ctx->mem_to_free = NULL;
1648                 block_ctx->pagev = NULL;
1649                 block_ctx->datav = NULL;
1650         }
1651 }
1652
1653 static int btrfsic_read_block(struct btrfsic_state *state,
1654                               struct btrfsic_block_data_ctx *block_ctx)
1655 {
1656         unsigned int num_pages;
1657         unsigned int i;
1658         u64 dev_bytenr;
1659         int ret;
1660
1661         BUG_ON(block_ctx->datav);
1662         BUG_ON(block_ctx->pagev);
1663         BUG_ON(block_ctx->mem_to_free);
1664         if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
1665                 printk(KERN_INFO
1666                        "btrfsic: read_block() with unaligned bytenr %llu\n",
1667                        block_ctx->dev_bytenr);
1668                 return -1;
1669         }
1670
1671         num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1672                     PAGE_CACHE_SHIFT;
1673         block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
1674                                           sizeof(*block_ctx->pagev)) *
1675                                          num_pages, GFP_NOFS);
1676         if (!block_ctx->mem_to_free)
1677                 return -1;
1678         block_ctx->datav = block_ctx->mem_to_free;
1679         block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
1680         for (i = 0; i < num_pages; i++) {
1681                 block_ctx->pagev[i] = alloc_page(GFP_NOFS);
1682                 if (!block_ctx->pagev[i])
1683                         return -1;
1684         }
1685
1686         dev_bytenr = block_ctx->dev_bytenr;
1687         for (i = 0; i < num_pages;) {
1688                 struct bio *bio;
1689                 unsigned int j;
1690                 DECLARE_COMPLETION_ONSTACK(complete);
1691
1692                 bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
1693                 if (!bio) {
1694                         printk(KERN_INFO
1695                                "btrfsic: bio_alloc() for %u pages failed!\n",
1696                                num_pages - i);
1697                         return -1;
1698                 }
1699                 bio->bi_bdev = block_ctx->dev->bdev;
1700                 bio->bi_sector = dev_bytenr >> 9;
1701                 bio->bi_end_io = btrfsic_complete_bio_end_io;
1702                 bio->bi_private = &complete;
1703
1704                 for (j = i; j < num_pages; j++) {
1705                         ret = bio_add_page(bio, block_ctx->pagev[j],
1706                                            PAGE_CACHE_SIZE, 0);
1707                         if (PAGE_CACHE_SIZE != ret)
1708                                 break;
1709                 }
1710                 if (j == i) {
1711                         printk(KERN_INFO
1712                                "btrfsic: error, failed to add a single page!\n");
1713                         return -1;
1714                 }
1715                 submit_bio(READ, bio);
1716
1717                 /* this will also unplug the queue */
1718                 wait_for_completion(&complete);
1719
1720                 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
1721                         printk(KERN_INFO
1722                                "btrfsic: read error at logical %llu dev %s!\n",
1723                                block_ctx->start, block_ctx->dev->name);
1724                         bio_put(bio);
1725                         return -1;
1726                 }
1727                 bio_put(bio);
1728                 dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
1729                 i = j;
1730         }
1731         for (i = 0; i < num_pages; i++) {
1732                 block_ctx->datav[i] = kmap(block_ctx->pagev[i]);
1733                 if (!block_ctx->datav[i]) {
1734                         printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n",
1735                                block_ctx->dev->name);
1736                         return -1;
1737                 }
1738         }
1739
1740         return block_ctx->len;
1741 }
1742
1743 static void btrfsic_complete_bio_end_io(struct bio *bio, int err)
1744 {
1745         complete((struct completion *)bio->bi_private);
1746 }
1747
1748 static void btrfsic_dump_database(struct btrfsic_state *state)
1749 {
1750         struct list_head *elem_all;
1751
1752         BUG_ON(NULL == state);
1753
1754         printk(KERN_INFO "all_blocks_list:\n");
1755         list_for_each(elem_all, &state->all_blocks_list) {
1756                 const struct btrfsic_block *const b_all =
1757                     list_entry(elem_all, struct btrfsic_block,
1758                                all_blocks_node);
1759                 struct list_head *elem_ref_to;
1760                 struct list_head *elem_ref_from;
1761
1762                 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
1763                        btrfsic_get_block_type(state, b_all),
1764                        b_all->logical_bytenr, b_all->dev_state->name,
1765                        b_all->dev_bytenr, b_all->mirror_num);
1766
1767                 list_for_each(elem_ref_to, &b_all->ref_to_list) {
1768                         const struct btrfsic_block_link *const l =
1769                             list_entry(elem_ref_to,
1770                                        struct btrfsic_block_link,
1771                                        node_ref_to);
1772
1773                         printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1774                                " refers %u* to"
1775                                " %c @%llu (%s/%llu/%d)\n",
1776                                btrfsic_get_block_type(state, b_all),
1777                                b_all->logical_bytenr, b_all->dev_state->name,
1778                                b_all->dev_bytenr, b_all->mirror_num,
1779                                l->ref_cnt,
1780                                btrfsic_get_block_type(state, l->block_ref_to),
1781                                l->block_ref_to->logical_bytenr,
1782                                l->block_ref_to->dev_state->name,
1783                                l->block_ref_to->dev_bytenr,
1784                                l->block_ref_to->mirror_num);
1785                 }
1786
1787                 list_for_each(elem_ref_from, &b_all->ref_from_list) {
1788                         const struct btrfsic_block_link *const l =
1789                             list_entry(elem_ref_from,
1790                                        struct btrfsic_block_link,
1791                                        node_ref_from);
1792
1793                         printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1794                                " is ref %u* from"
1795                                " %c @%llu (%s/%llu/%d)\n",
1796                                btrfsic_get_block_type(state, b_all),
1797                                b_all->logical_bytenr, b_all->dev_state->name,
1798                                b_all->dev_bytenr, b_all->mirror_num,
1799                                l->ref_cnt,
1800                                btrfsic_get_block_type(state, l->block_ref_from),
1801                                l->block_ref_from->logical_bytenr,
1802                                l->block_ref_from->dev_state->name,
1803                                l->block_ref_from->dev_bytenr,
1804                                l->block_ref_from->mirror_num);
1805                 }
1806
1807                 printk(KERN_INFO "\n");
1808         }
1809 }
1810
1811 /*
1812  * Test whether the disk block contains a tree block (leaf or node)
1813  * (note that this test fails for the super block)
1814  */
1815 static int btrfsic_test_for_metadata(struct btrfsic_state *state,
1816                                      char **datav, unsigned int num_pages)
1817 {
1818         struct btrfs_header *h;
1819         u8 csum[BTRFS_CSUM_SIZE];
1820         u32 crc = ~(u32)0;
1821         unsigned int i;
1822
1823         if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
1824                 return 1; /* not metadata */
1825         num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
1826         h = (struct btrfs_header *)datav[0];
1827
1828         if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
1829                 return 1;
1830
1831         for (i = 0; i < num_pages; i++) {
1832                 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
1833                 size_t sublen = i ? PAGE_CACHE_SIZE :
1834                                     (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
1835
1836                 crc = crc32c(crc, data, sublen);
1837         }
1838         btrfs_csum_final(crc, csum);
1839         if (memcmp(csum, h->csum, state->csum_size))
1840                 return 1;
1841
1842         return 0; /* is metadata */
1843 }
1844
1845 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1846                                           u64 dev_bytenr, char **mapped_datav,
1847                                           unsigned int num_pages,
1848                                           struct bio *bio, int *bio_is_patched,
1849                                           struct buffer_head *bh,
1850                                           int submit_bio_bh_rw)
1851 {
1852         int is_metadata;
1853         struct btrfsic_block *block;
1854         struct btrfsic_block_data_ctx block_ctx;
1855         int ret;
1856         struct btrfsic_state *state = dev_state->state;
1857         struct block_device *bdev = dev_state->bdev;
1858         unsigned int processed_len;
1859
1860         if (NULL != bio_is_patched)
1861                 *bio_is_patched = 0;
1862
1863 again:
1864         if (num_pages == 0)
1865                 return;
1866
1867         processed_len = 0;
1868         is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav,
1869                                                       num_pages));
1870
1871         block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
1872                                                &state->block_hashtable);
1873         if (NULL != block) {
1874                 u64 bytenr = 0;
1875                 struct list_head *elem_ref_to;
1876                 struct list_head *tmp_ref_to;
1877
1878                 if (block->is_superblock) {
1879                         bytenr = btrfs_super_bytenr((struct btrfs_super_block *)
1880                                                     mapped_datav[0]);
1881                         if (num_pages * PAGE_CACHE_SIZE <
1882                             BTRFS_SUPER_INFO_SIZE) {
1883                                 printk(KERN_INFO
1884                                        "btrfsic: cannot work with too short bios!\n");
1885                                 return;
1886                         }
1887                         is_metadata = 1;
1888                         BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1));
1889                         processed_len = BTRFS_SUPER_INFO_SIZE;
1890                         if (state->print_mask &
1891                             BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
1892                                 printk(KERN_INFO
1893                                        "[before new superblock is written]:\n");
1894                                 btrfsic_dump_tree_sub(state, block, 0);
1895                         }
1896                 }
1897                 if (is_metadata) {
1898                         if (!block->is_superblock) {
1899                                 if (num_pages * PAGE_CACHE_SIZE <
1900                                     state->metablock_size) {
1901                                         printk(KERN_INFO
1902                                                "btrfsic: cannot work with too short bios!\n");
1903                                         return;
1904                                 }
1905                                 processed_len = state->metablock_size;
1906                                 bytenr = btrfs_stack_header_bytenr(
1907                                                 (struct btrfs_header *)
1908                                                 mapped_datav[0]);
1909                                 btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
1910                                                                dev_state,
1911                                                                dev_bytenr);
1912                         }
1913                         if (block->logical_bytenr != bytenr &&
1914                             !(!block->is_metadata &&
1915                               block->logical_bytenr == 0))
1916                                 printk(KERN_INFO
1917                                        "Written block @%llu (%s/%llu/%d)"
1918                                        " found in hash table, %c,"
1919                                        " bytenr mismatch"
1920                                        " (!= stored %llu).\n",
1921                                        bytenr, dev_state->name, dev_bytenr,
1922                                        block->mirror_num,
1923                                        btrfsic_get_block_type(state, block),
1924                                        block->logical_bytenr);
1925                         else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1926                                 printk(KERN_INFO
1927                                        "Written block @%llu (%s/%llu/%d)"
1928                                        " found in hash table, %c.\n",
1929                                        bytenr, dev_state->name, dev_bytenr,
1930                                        block->mirror_num,
1931                                        btrfsic_get_block_type(state, block));
1932                         block->logical_bytenr = bytenr;
1933                 } else {
1934                         if (num_pages * PAGE_CACHE_SIZE <
1935                             state->datablock_size) {
1936                                 printk(KERN_INFO
1937                                        "btrfsic: cannot work with too short bios!\n");
1938                                 return;
1939                         }
1940                         processed_len = state->datablock_size;
1941                         bytenr = block->logical_bytenr;
1942                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1943                                 printk(KERN_INFO
1944                                        "Written block @%llu (%s/%llu/%d)"
1945                                        " found in hash table, %c.\n",
1946                                        bytenr, dev_state->name, dev_bytenr,
1947                                        block->mirror_num,
1948                                        btrfsic_get_block_type(state, block));
1949                 }
1950
1951                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1952                         printk(KERN_INFO
1953                                "ref_to_list: %cE, ref_from_list: %cE\n",
1954                                list_empty(&block->ref_to_list) ? ' ' : '!',
1955                                list_empty(&block->ref_from_list) ? ' ' : '!');
1956                 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
1957                         printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1958                                " @%llu (%s/%llu/%d), old(gen=%llu,"
1959                                " objectid=%llu, type=%d, offset=%llu),"
1960                                " new(gen=%llu),"
1961                                " which is referenced by most recent superblock"
1962                                " (superblockgen=%llu)!\n",
1963                                btrfsic_get_block_type(state, block), bytenr,
1964                                dev_state->name, dev_bytenr, block->mirror_num,
1965                                block->generation,
1966                                btrfs_disk_key_objectid(&block->disk_key),
1967                                block->disk_key.type,
1968                                btrfs_disk_key_offset(&block->disk_key),
1969                                btrfs_stack_header_generation(
1970                                        (struct btrfs_header *) mapped_datav[0]),
1971                                state->max_superblock_generation);
1972                         btrfsic_dump_tree(state);
1973                 }
1974
1975                 if (!block->is_iodone && !block->never_written) {
1976                         printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1977                                " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
1978                                " which is not yet iodone!\n",
1979                                btrfsic_get_block_type(state, block), bytenr,
1980                                dev_state->name, dev_bytenr, block->mirror_num,
1981                                block->generation,
1982                                btrfs_stack_header_generation(
1983                                        (struct btrfs_header *)
1984                                        mapped_datav[0]));
1985                         /* it would not be safe to go on */
1986                         btrfsic_dump_tree(state);
1987                         goto continue_loop;
1988                 }
1989
1990                 /*
1991                  * Clear all references of this block. Do not free
1992                  * the block itself even if is not referenced anymore
1993                  * because it still carries valueable information
1994                  * like whether it was ever written and IO completed.
1995                  */
1996                 list_for_each_safe(elem_ref_to, tmp_ref_to,
1997                                    &block->ref_to_list) {
1998                         struct btrfsic_block_link *const l =
1999                             list_entry(elem_ref_to,
2000                                        struct btrfsic_block_link,
2001                                        node_ref_to);
2002
2003                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2004                                 btrfsic_print_rem_link(state, l);
2005                         l->ref_cnt--;
2006                         if (0 == l->ref_cnt) {
2007                                 list_del(&l->node_ref_to);
2008                                 list_del(&l->node_ref_from);
2009                                 btrfsic_block_link_hashtable_remove(l);
2010                                 btrfsic_block_link_free(l);
2011                         }
2012                 }
2013
2014                 if (block->is_superblock)
2015                         ret = btrfsic_map_superblock(state, bytenr,
2016                                                      processed_len,
2017                                                      bdev, &block_ctx);
2018                 else
2019                         ret = btrfsic_map_block(state, bytenr, processed_len,
2020                                                 &block_ctx, 0);
2021                 if (ret) {
2022                         printk(KERN_INFO
2023                                "btrfsic: btrfsic_map_block(root @%llu)"
2024                                " failed!\n", bytenr);
2025                         goto continue_loop;
2026                 }
2027                 block_ctx.datav = mapped_datav;
2028                 /* the following is required in case of writes to mirrors,
2029                  * use the same that was used for the lookup */
2030                 block_ctx.dev = dev_state;
2031                 block_ctx.dev_bytenr = dev_bytenr;
2032
2033                 if (is_metadata || state->include_extent_data) {
2034                         block->never_written = 0;
2035                         block->iodone_w_error = 0;
2036                         if (NULL != bio) {
2037                                 block->is_iodone = 0;
2038                                 BUG_ON(NULL == bio_is_patched);
2039                                 if (!*bio_is_patched) {
2040                                         block->orig_bio_bh_private =
2041                                             bio->bi_private;
2042                                         block->orig_bio_bh_end_io.bio =
2043                                             bio->bi_end_io;
2044                                         block->next_in_same_bio = NULL;
2045                                         bio->bi_private = block;
2046                                         bio->bi_end_io = btrfsic_bio_end_io;
2047                                         *bio_is_patched = 1;
2048                                 } else {
2049                                         struct btrfsic_block *chained_block =
2050                                             (struct btrfsic_block *)
2051                                             bio->bi_private;
2052
2053                                         BUG_ON(NULL == chained_block);
2054                                         block->orig_bio_bh_private =
2055                                             chained_block->orig_bio_bh_private;
2056                                         block->orig_bio_bh_end_io.bio =
2057                                             chained_block->orig_bio_bh_end_io.
2058                                             bio;
2059                                         block->next_in_same_bio = chained_block;
2060                                         bio->bi_private = block;
2061                                 }
2062                         } else if (NULL != bh) {
2063                                 block->is_iodone = 0;
2064                                 block->orig_bio_bh_private = bh->b_private;
2065                                 block->orig_bio_bh_end_io.bh = bh->b_end_io;
2066                                 block->next_in_same_bio = NULL;
2067                                 bh->b_private = block;
2068                                 bh->b_end_io = btrfsic_bh_end_io;
2069                         } else {
2070                                 block->is_iodone = 1;
2071                                 block->orig_bio_bh_private = NULL;
2072                                 block->orig_bio_bh_end_io.bio = NULL;
2073                                 block->next_in_same_bio = NULL;
2074                         }
2075                 }
2076
2077                 block->flush_gen = dev_state->last_flush_gen + 1;
2078                 block->submit_bio_bh_rw = submit_bio_bh_rw;
2079                 if (is_metadata) {
2080                         block->logical_bytenr = bytenr;
2081                         block->is_metadata = 1;
2082                         if (block->is_superblock) {
2083                                 BUG_ON(PAGE_CACHE_SIZE !=
2084                                        BTRFS_SUPER_INFO_SIZE);
2085                                 ret = btrfsic_process_written_superblock(
2086                                                 state,
2087                                                 block,
2088                                                 (struct btrfs_super_block *)
2089                                                 mapped_datav[0]);
2090                                 if (state->print_mask &
2091                                     BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
2092                                         printk(KERN_INFO
2093                                         "[after new superblock is written]:\n");
2094                                         btrfsic_dump_tree_sub(state, block, 0);
2095                                 }
2096                         } else {
2097                                 block->mirror_num = 0;  /* unknown */
2098                                 ret = btrfsic_process_metablock(
2099                                                 state,
2100                                                 block,
2101                                                 &block_ctx,
2102                                                 0, 0);
2103                         }
2104                         if (ret)
2105                                 printk(KERN_INFO
2106                                        "btrfsic: btrfsic_process_metablock"
2107                                        "(root @%llu) failed!\n",
2108                                        dev_bytenr);
2109                 } else {
2110                         block->is_metadata = 0;
2111                         block->mirror_num = 0;  /* unknown */
2112                         block->generation = BTRFSIC_GENERATION_UNKNOWN;
2113                         if (!state->include_extent_data
2114                             && list_empty(&block->ref_from_list)) {
2115                                 /*
2116                                  * disk block is overwritten with extent
2117                                  * data (not meta data) and we are configured
2118                                  * to not include extent data: take the
2119                                  * chance and free the block's memory
2120                                  */
2121                                 btrfsic_block_hashtable_remove(block);
2122                                 list_del(&block->all_blocks_node);
2123                                 btrfsic_block_free(block);
2124                         }
2125                 }
2126                 btrfsic_release_block_ctx(&block_ctx);
2127         } else {
2128                 /* block has not been found in hash table */
2129                 u64 bytenr;
2130
2131                 if (!is_metadata) {
2132                         processed_len = state->datablock_size;
2133                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2134                                 printk(KERN_INFO "Written block (%s/%llu/?)"
2135                                        " !found in hash table, D.\n",
2136                                        dev_state->name, dev_bytenr);
2137                         if (!state->include_extent_data) {
2138                                 /* ignore that written D block */
2139                                 goto continue_loop;
2140                         }
2141
2142                         /* this is getting ugly for the
2143                          * include_extent_data case... */
2144                         bytenr = 0;     /* unknown */
2145                         block_ctx.start = bytenr;
2146                         block_ctx.len = processed_len;
2147                         block_ctx.mem_to_free = NULL;
2148                         block_ctx.pagev = NULL;
2149                 } else {
2150                         processed_len = state->metablock_size;
2151                         bytenr = btrfs_stack_header_bytenr(
2152                                         (struct btrfs_header *)
2153                                         mapped_datav[0]);
2154                         btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
2155                                                        dev_bytenr);
2156                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2157                                 printk(KERN_INFO
2158                                        "Written block @%llu (%s/%llu/?)"
2159                                        " !found in hash table, M.\n",
2160                                        bytenr, dev_state->name, dev_bytenr);
2161
2162                         ret = btrfsic_map_block(state, bytenr, processed_len,
2163                                                 &block_ctx, 0);
2164                         if (ret) {
2165                                 printk(KERN_INFO
2166                                        "btrfsic: btrfsic_map_block(root @%llu)"
2167                                        " failed!\n",
2168                                        dev_bytenr);
2169                                 goto continue_loop;
2170                         }
2171                 }
2172                 block_ctx.datav = mapped_datav;
2173                 /* the following is required in case of writes to mirrors,
2174                  * use the same that was used for the lookup */
2175                 block_ctx.dev = dev_state;
2176                 block_ctx.dev_bytenr = dev_bytenr;
2177
2178                 block = btrfsic_block_alloc();
2179                 if (NULL == block) {
2180                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2181                         btrfsic_release_block_ctx(&block_ctx);
2182                         goto continue_loop;
2183                 }
2184                 block->dev_state = dev_state;
2185                 block->dev_bytenr = dev_bytenr;
2186                 block->logical_bytenr = bytenr;
2187                 block->is_metadata = is_metadata;
2188                 block->never_written = 0;
2189                 block->iodone_w_error = 0;
2190                 block->mirror_num = 0;  /* unknown */
2191                 block->flush_gen = dev_state->last_flush_gen + 1;
2192                 block->submit_bio_bh_rw = submit_bio_bh_rw;
2193                 if (NULL != bio) {
2194                         block->is_iodone = 0;
2195                         BUG_ON(NULL == bio_is_patched);
2196                         if (!*bio_is_patched) {
2197                                 block->orig_bio_bh_private = bio->bi_private;
2198                                 block->orig_bio_bh_end_io.bio = bio->bi_end_io;
2199                                 block->next_in_same_bio = NULL;
2200                                 bio->bi_private = block;
2201                                 bio->bi_end_io = btrfsic_bio_end_io;
2202                                 *bio_is_patched = 1;
2203                         } else {
2204                                 struct btrfsic_block *chained_block =
2205                                     (struct btrfsic_block *)
2206                                     bio->bi_private;
2207
2208                                 BUG_ON(NULL == chained_block);
2209                                 block->orig_bio_bh_private =
2210                                     chained_block->orig_bio_bh_private;
2211                                 block->orig_bio_bh_end_io.bio =
2212                                     chained_block->orig_bio_bh_end_io.bio;
2213                                 block->next_in_same_bio = chained_block;
2214                                 bio->bi_private = block;
2215                         }
2216                 } else if (NULL != bh) {
2217                         block->is_iodone = 0;
2218                         block->orig_bio_bh_private = bh->b_private;
2219                         block->orig_bio_bh_end_io.bh = bh->b_end_io;
2220                         block->next_in_same_bio = NULL;
2221                         bh->b_private = block;
2222                         bh->b_end_io = btrfsic_bh_end_io;
2223                 } else {
2224                         block->is_iodone = 1;
2225                         block->orig_bio_bh_private = NULL;
2226                         block->orig_bio_bh_end_io.bio = NULL;
2227                         block->next_in_same_bio = NULL;
2228                 }
2229                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2230                         printk(KERN_INFO
2231                                "New written %c-block @%llu (%s/%llu/%d)\n",
2232                                is_metadata ? 'M' : 'D',
2233                                block->logical_bytenr, block->dev_state->name,
2234                                block->dev_bytenr, block->mirror_num);
2235                 list_add(&block->all_blocks_node, &state->all_blocks_list);
2236                 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2237
2238                 if (is_metadata) {
2239                         ret = btrfsic_process_metablock(state, block,
2240                                                         &block_ctx, 0, 0);
2241                         if (ret)
2242                                 printk(KERN_INFO
2243                                        "btrfsic: process_metablock(root @%llu)"
2244                                        " failed!\n",
2245                                        dev_bytenr);
2246                 }
2247                 btrfsic_release_block_ctx(&block_ctx);
2248         }
2249
2250 continue_loop:
2251         BUG_ON(!processed_len);
2252         dev_bytenr += processed_len;
2253         mapped_datav += processed_len >> PAGE_CACHE_SHIFT;
2254         num_pages -= processed_len >> PAGE_CACHE_SHIFT;
2255         goto again;
2256 }
2257
2258 static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
2259 {
2260         struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private;
2261         int iodone_w_error;
2262
2263         /* mutex is not held! This is not save if IO is not yet completed
2264          * on umount */
2265         iodone_w_error = 0;
2266         if (bio_error_status)
2267                 iodone_w_error = 1;
2268
2269         BUG_ON(NULL == block);
2270         bp->bi_private = block->orig_bio_bh_private;
2271         bp->bi_end_io = block->orig_bio_bh_end_io.bio;
2272
2273         do {
2274                 struct btrfsic_block *next_block;
2275                 struct btrfsic_dev_state *const dev_state = block->dev_state;
2276
2277                 if ((dev_state->state->print_mask &
2278                      BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2279                         printk(KERN_INFO
2280                                "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2281                                bio_error_status,
2282                                btrfsic_get_block_type(dev_state->state, block),
2283                                block->logical_bytenr, dev_state->name,
2284                                block->dev_bytenr, block->mirror_num);
2285                 next_block = block->next_in_same_bio;
2286                 block->iodone_w_error = iodone_w_error;
2287                 if (block->submit_bio_bh_rw & REQ_FLUSH) {
2288                         dev_state->last_flush_gen++;
2289                         if ((dev_state->state->print_mask &
2290                              BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2291                                 printk(KERN_INFO
2292                                        "bio_end_io() new %s flush_gen=%llu\n",
2293                                        dev_state->name,
2294                                        dev_state->last_flush_gen);
2295                 }
2296                 if (block->submit_bio_bh_rw & REQ_FUA)
2297                         block->flush_gen = 0; /* FUA completed means block is
2298                                                * on disk */
2299                 block->is_iodone = 1; /* for FLUSH, this releases the block */
2300                 block = next_block;
2301         } while (NULL != block);
2302
2303         bp->bi_end_io(bp, bio_error_status);
2304 }
2305
2306 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
2307 {
2308         struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private;
2309         int iodone_w_error = !uptodate;
2310         struct btrfsic_dev_state *dev_state;
2311
2312         BUG_ON(NULL == block);
2313         dev_state = block->dev_state;
2314         if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2315                 printk(KERN_INFO
2316                        "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
2317                        iodone_w_error,
2318                        btrfsic_get_block_type(dev_state->state, block),
2319                        block->logical_bytenr, block->dev_state->name,
2320                        block->dev_bytenr, block->mirror_num);
2321
2322         block->iodone_w_error = iodone_w_error;
2323         if (block->submit_bio_bh_rw & REQ_FLUSH) {
2324                 dev_state->last_flush_gen++;
2325                 if ((dev_state->state->print_mask &
2326                      BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2327                         printk(KERN_INFO
2328                                "bh_end_io() new %s flush_gen=%llu\n",
2329                                dev_state->name, dev_state->last_flush_gen);
2330         }
2331         if (block->submit_bio_bh_rw & REQ_FUA)
2332                 block->flush_gen = 0; /* FUA completed means block is on disk */
2333
2334         bh->b_private = block->orig_bio_bh_private;
2335         bh->b_end_io = block->orig_bio_bh_end_io.bh;
2336         block->is_iodone = 1; /* for FLUSH, this releases the block */
2337         bh->b_end_io(bh, uptodate);
2338 }
2339
2340 static int btrfsic_process_written_superblock(
2341                 struct btrfsic_state *state,
2342                 struct btrfsic_block *const superblock,
2343                 struct btrfs_super_block *const super_hdr)
2344 {
2345         int pass;
2346
2347         superblock->generation = btrfs_super_generation(super_hdr);
2348         if (!(superblock->generation > state->max_superblock_generation ||
2349               0 == state->max_superblock_generation)) {
2350                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2351                         printk(KERN_INFO
2352                                "btrfsic: superblock @%llu (%s/%llu/%d)"
2353                                " with old gen %llu <= %llu\n",
2354                                superblock->logical_bytenr,
2355                                superblock->dev_state->name,
2356                                superblock->dev_bytenr, superblock->mirror_num,
2357                                btrfs_super_generation(super_hdr),
2358                                state->max_superblock_generation);
2359         } else {
2360                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2361                         printk(KERN_INFO
2362                                "btrfsic: got new superblock @%llu (%s/%llu/%d)"
2363                                " with new gen %llu > %llu\n",
2364                                superblock->logical_bytenr,
2365                                superblock->dev_state->name,
2366                                superblock->dev_bytenr, superblock->mirror_num,
2367                                btrfs_super_generation(super_hdr),
2368                                state->max_superblock_generation);
2369
2370                 state->max_superblock_generation =
2371                     btrfs_super_generation(super_hdr);
2372                 state->latest_superblock = superblock;
2373         }
2374
2375         for (pass = 0; pass < 3; pass++) {
2376                 int ret;
2377                 u64 next_bytenr;
2378                 struct btrfsic_block *next_block;
2379                 struct btrfsic_block_data_ctx tmp_next_block_ctx;
2380                 struct btrfsic_block_link *l;
2381                 int num_copies;
2382                 int mirror_num;
2383                 const char *additional_string = NULL;
2384                 struct btrfs_disk_key tmp_disk_key = {0};
2385
2386                 btrfs_set_disk_key_objectid(&tmp_disk_key,
2387                                             BTRFS_ROOT_ITEM_KEY);
2388                 btrfs_set_disk_key_objectid(&tmp_disk_key, 0);
2389
2390                 switch (pass) {
2391                 case 0:
2392                         btrfs_set_disk_key_objectid(&tmp_disk_key,
2393                                                     BTRFS_ROOT_TREE_OBJECTID);
2394                         additional_string = "root ";
2395                         next_bytenr = btrfs_super_root(super_hdr);
2396                         if (state->print_mask &
2397                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2398                                 printk(KERN_INFO "root@%llu\n", next_bytenr);
2399                         break;
2400                 case 1:
2401                         btrfs_set_disk_key_objectid(&tmp_disk_key,
2402                                                     BTRFS_CHUNK_TREE_OBJECTID);
2403                         additional_string = "chunk ";
2404                         next_bytenr = btrfs_super_chunk_root(super_hdr);
2405                         if (state->print_mask &
2406                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2407                                 printk(KERN_INFO "chunk@%llu\n", next_bytenr);
2408                         break;
2409                 case 2:
2410                         btrfs_set_disk_key_objectid(&tmp_disk_key,
2411                                                     BTRFS_TREE_LOG_OBJECTID);
2412                         additional_string = "log ";
2413                         next_bytenr = btrfs_super_log_root(super_hdr);
2414                         if (0 == next_bytenr)
2415                                 continue;
2416                         if (state->print_mask &
2417                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2418                                 printk(KERN_INFO "log@%llu\n", next_bytenr);
2419                         break;
2420                 }
2421
2422                 num_copies =
2423                     btrfs_num_copies(state->root->fs_info,
2424                                      next_bytenr, BTRFS_SUPER_INFO_SIZE);
2425                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
2426                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
2427                                next_bytenr, num_copies);
2428                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2429                         int was_created;
2430
2431                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2432                                 printk(KERN_INFO
2433                                        "btrfsic_process_written_superblock("
2434                                        "mirror_num=%d)\n", mirror_num);
2435                         ret = btrfsic_map_block(state, next_bytenr,
2436                                                 BTRFS_SUPER_INFO_SIZE,
2437                                                 &tmp_next_block_ctx,
2438                                                 mirror_num);
2439                         if (ret) {
2440                                 printk(KERN_INFO
2441                                        "btrfsic: btrfsic_map_block(@%llu,"
2442                                        " mirror=%d) failed!\n",
2443                                        next_bytenr, mirror_num);
2444                                 return -1;
2445                         }
2446
2447                         next_block = btrfsic_block_lookup_or_add(
2448                                         state,
2449                                         &tmp_next_block_ctx,
2450                                         additional_string,
2451                                         1, 0, 1,
2452                                         mirror_num,
2453                                         &was_created);
2454                         if (NULL == next_block) {
2455                                 printk(KERN_INFO
2456                                        "btrfsic: error, kmalloc failed!\n");
2457                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
2458                                 return -1;
2459                         }
2460
2461                         next_block->disk_key = tmp_disk_key;
2462                         if (was_created)
2463                                 next_block->generation =
2464                                     BTRFSIC_GENERATION_UNKNOWN;
2465                         l = btrfsic_block_link_lookup_or_add(
2466                                         state,
2467                                         &tmp_next_block_ctx,
2468                                         next_block,
2469                                         superblock,
2470                                         BTRFSIC_GENERATION_UNKNOWN);
2471                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
2472                         if (NULL == l)
2473                                 return -1;
2474                 }
2475         }
2476
2477         if (WARN_ON(-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)))
2478                 btrfsic_dump_tree(state);
2479
2480         return 0;
2481 }
2482
2483 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2484                                         struct btrfsic_block *const block,
2485                                         int recursion_level)
2486 {
2487         struct list_head *elem_ref_to;
2488         int ret = 0;
2489
2490         if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2491                 /*
2492                  * Note that this situation can happen and does not
2493                  * indicate an error in regular cases. It happens
2494                  * when disk blocks are freed and later reused.
2495                  * The check-integrity module is not aware of any
2496                  * block free operations, it just recognizes block
2497                  * write operations. Therefore it keeps the linkage
2498                  * information for a block until a block is
2499                  * rewritten. This can temporarily cause incorrect
2500                  * and even circular linkage informations. This
2501                  * causes no harm unless such blocks are referenced
2502                  * by the most recent super block.
2503                  */
2504                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2505                         printk(KERN_INFO
2506                                "btrfsic: abort cyclic linkage (case 1).\n");
2507
2508                 return ret;
2509         }
2510
2511         /*
2512          * This algorithm is recursive because the amount of used stack
2513          * space is very small and the max recursion depth is limited.
2514          */
2515         list_for_each(elem_ref_to, &block->ref_to_list) {
2516                 const struct btrfsic_block_link *const l =
2517                     list_entry(elem_ref_to, struct btrfsic_block_link,
2518                                node_ref_to);
2519
2520                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2521                         printk(KERN_INFO
2522                                "rl=%d, %c @%llu (%s/%llu/%d)"
2523                                " %u* refers to %c @%llu (%s/%llu/%d)\n",
2524                                recursion_level,
2525                                btrfsic_get_block_type(state, block),
2526                                block->logical_bytenr, block->dev_state->name,
2527                                block->dev_bytenr, block->mirror_num,
2528                                l->ref_cnt,
2529                                btrfsic_get_block_type(state, l->block_ref_to),
2530                                l->block_ref_to->logical_bytenr,
2531                                l->block_ref_to->dev_state->name,
2532                                l->block_ref_to->dev_bytenr,
2533                                l->block_ref_to->mirror_num);
2534                 if (l->block_ref_to->never_written) {
2535                         printk(KERN_INFO "btrfs: attempt to write superblock"
2536                                " which references block %c @%llu (%s/%llu/%d)"
2537                                " which is never written!\n",
2538                                btrfsic_get_block_type(state, l->block_ref_to),
2539                                l->block_ref_to->logical_bytenr,
2540                                l->block_ref_to->dev_state->name,
2541                                l->block_ref_to->dev_bytenr,
2542                                l->block_ref_to->mirror_num);
2543                         ret = -1;
2544                 } else if (!l->block_ref_to->is_iodone) {
2545                         printk(KERN_INFO "btrfs: attempt to write superblock"
2546                                " which references block %c @%llu (%s/%llu/%d)"
2547                                " which is not yet iodone!\n",
2548                                btrfsic_get_block_type(state, l->block_ref_to),
2549                                l->block_ref_to->logical_bytenr,
2550                                l->block_ref_to->dev_state->name,
2551                                l->block_ref_to->dev_bytenr,
2552                                l->block_ref_to->mirror_num);
2553                         ret = -1;
2554                 } else if (l->block_ref_to->iodone_w_error) {
2555                         printk(KERN_INFO "btrfs: attempt to write superblock"
2556                                " which references block %c @%llu (%s/%llu/%d)"
2557                                " which has write error!\n",
2558                                btrfsic_get_block_type(state, l->block_ref_to),
2559                                l->block_ref_to->logical_bytenr,
2560                                l->block_ref_to->dev_state->name,
2561                                l->block_ref_to->dev_bytenr,
2562                                l->block_ref_to->mirror_num);
2563                         ret = -1;
2564                 } else if (l->parent_generation !=
2565                            l->block_ref_to->generation &&
2566                            BTRFSIC_GENERATION_UNKNOWN !=
2567                            l->parent_generation &&
2568                            BTRFSIC_GENERATION_UNKNOWN !=
2569                            l->block_ref_to->generation) {
2570                         printk(KERN_INFO "btrfs: attempt to write superblock"
2571                                " which references block %c @%llu (%s/%llu/%d)"
2572                                " with generation %llu !="
2573                                " parent generation %llu!\n",
2574                                btrfsic_get_block_type(state, l->block_ref_to),
2575                                l->block_ref_to->logical_bytenr,
2576                                l->block_ref_to->dev_state->name,
2577                                l->block_ref_to->dev_bytenr,
2578                                l->block_ref_to->mirror_num,
2579                                l->block_ref_to->generation,
2580                                l->parent_generation);
2581                         ret = -1;
2582                 } else if (l->block_ref_to->flush_gen >
2583                            l->block_ref_to->dev_state->last_flush_gen) {
2584                         printk(KERN_INFO "btrfs: attempt to write superblock"
2585                                " which references block %c @%llu (%s/%llu/%d)"
2586                                " which is not flushed out of disk's write cache"
2587                                " (block flush_gen=%llu,"
2588                                " dev->flush_gen=%llu)!\n",
2589                                btrfsic_get_block_type(state, l->block_ref_to),
2590                                l->block_ref_to->logical_bytenr,
2591                                l->block_ref_to->dev_state->name,
2592                                l->block_ref_to->dev_bytenr,
2593                                l->block_ref_to->mirror_num, block->flush_gen,
2594                                l->block_ref_to->dev_state->last_flush_gen);
2595                         ret = -1;
2596                 } else if (-1 == btrfsic_check_all_ref_blocks(state,
2597                                                               l->block_ref_to,
2598                                                               recursion_level +
2599                                                               1)) {
2600                         ret = -1;
2601                 }
2602         }
2603
2604         return ret;
2605 }
2606
2607 static int btrfsic_is_block_ref_by_superblock(
2608                 const struct btrfsic_state *state,
2609                 const struct btrfsic_block *block,
2610                 int recursion_level)
2611 {
2612         struct list_head *elem_ref_from;
2613
2614         if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2615                 /* refer to comment at "abort cyclic linkage (case 1)" */
2616                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2617                         printk(KERN_INFO
2618                                "btrfsic: abort cyclic linkage (case 2).\n");
2619
2620                 return 0;
2621         }
2622
2623         /*
2624          * This algorithm is recursive because the amount of used stack space
2625          * is very small and the max recursion depth is limited.
2626          */
2627         list_for_each(elem_ref_from, &block->ref_from_list) {
2628                 const struct btrfsic_block_link *const l =
2629                     list_entry(elem_ref_from, struct btrfsic_block_link,
2630                                node_ref_from);
2631
2632                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2633                         printk(KERN_INFO
2634                                "rl=%d, %c @%llu (%s/%llu/%d)"
2635                                " is ref %u* from %c @%llu (%s/%llu/%d)\n",
2636                                recursion_level,
2637                                btrfsic_get_block_type(state, block),
2638                                block->logical_bytenr, block->dev_state->name,
2639                                block->dev_bytenr, block->mirror_num,
2640                                l->ref_cnt,
2641                                btrfsic_get_block_type(state, l->block_ref_from),
2642                                l->block_ref_from->logical_bytenr,
2643                                l->block_ref_from->dev_state->name,
2644                                l->block_ref_from->dev_bytenr,
2645                                l->block_ref_from->mirror_num);
2646                 if (l->block_ref_from->is_superblock &&
2647                     state->latest_superblock->dev_bytenr ==
2648                     l->block_ref_from->dev_bytenr &&
2649                     state->latest_superblock->dev_state->bdev ==
2650                     l->block_ref_from->dev_state->bdev)
2651                         return 1;
2652                 else if (btrfsic_is_block_ref_by_superblock(state,
2653                                                             l->block_ref_from,
2654                                                             recursion_level +
2655                                                             1))
2656                         return 1;
2657         }
2658
2659         return 0;
2660 }
2661
2662 static void btrfsic_print_add_link(const struct btrfsic_state *state,
2663                                    const struct btrfsic_block_link *l)
2664 {
2665         printk(KERN_INFO
2666                "Add %u* link from %c @%llu (%s/%llu/%d)"
2667                " to %c @%llu (%s/%llu/%d).\n",
2668                l->ref_cnt,
2669                btrfsic_get_block_type(state, l->block_ref_from),
2670                l->block_ref_from->logical_bytenr,
2671                l->block_ref_from->dev_state->name,
2672                l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
2673                btrfsic_get_block_type(state, l->block_ref_to),
2674                l->block_ref_to->logical_bytenr,
2675                l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
2676                l->block_ref_to->mirror_num);
2677 }
2678
2679 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
2680                                    const struct btrfsic_block_link *l)
2681 {
2682         printk(KERN_INFO
2683                "Rem %u* link from %c @%llu (%s/%llu/%d)"
2684                " to %c @%llu (%s/%llu/%d).\n",
2685                l->ref_cnt,
2686                btrfsic_get_block_type(state, l->block_ref_from),
2687                l->block_ref_from->logical_bytenr,
2688                l->block_ref_from->dev_state->name,
2689                l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
2690                btrfsic_get_block_type(state, l->block_ref_to),
2691                l->block_ref_to->logical_bytenr,
2692                l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
2693                l->block_ref_to->mirror_num);
2694 }
2695
2696 static char btrfsic_get_block_type(const struct btrfsic_state *state,
2697                                    const struct btrfsic_block *block)
2698 {
2699         if (block->is_superblock &&
2700             state->latest_superblock->dev_bytenr == block->dev_bytenr &&
2701             state->latest_superblock->dev_state->bdev == block->dev_state->bdev)
2702                 return 'S';
2703         else if (block->is_superblock)
2704                 return 's';
2705         else if (block->is_metadata)
2706                 return 'M';
2707         else
2708                 return 'D';
2709 }
2710
2711 static void btrfsic_dump_tree(const struct btrfsic_state *state)
2712 {
2713         btrfsic_dump_tree_sub(state, state->latest_superblock, 0);
2714 }
2715
2716 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
2717                                   const struct btrfsic_block *block,
2718                                   int indent_level)
2719 {
2720         struct list_head *elem_ref_to;
2721         int indent_add;
2722         static char buf[80];
2723         int cursor_position;
2724
2725         /*
2726          * Should better fill an on-stack buffer with a complete line and
2727          * dump it at once when it is time to print a newline character.
2728          */
2729
2730         /*
2731          * This algorithm is recursive because the amount of used stack space
2732          * is very small and the max recursion depth is limited.
2733          */
2734         indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
2735                              btrfsic_get_block_type(state, block),
2736                              block->logical_bytenr, block->dev_state->name,
2737                              block->dev_bytenr, block->mirror_num);
2738         if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2739                 printk("[...]\n");
2740                 return;
2741         }
2742         printk(buf);
2743         indent_level += indent_add;
2744         if (list_empty(&block->ref_to_list)) {
2745                 printk("\n");
2746                 return;
2747         }
2748         if (block->mirror_num > 1 &&
2749             !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) {
2750                 printk(" [...]\n");
2751                 return;
2752         }
2753
2754         cursor_position = indent_level;
2755         list_for_each(elem_ref_to, &block->ref_to_list) {
2756                 const struct btrfsic_block_link *const l =
2757                     list_entry(elem_ref_to, struct btrfsic_block_link,
2758                                node_ref_to);
2759
2760                 while (cursor_position < indent_level) {
2761                         printk(" ");
2762                         cursor_position++;
2763                 }
2764                 if (l->ref_cnt > 1)
2765                         indent_add = sprintf(buf, " %d*--> ", l->ref_cnt);
2766                 else
2767                         indent_add = sprintf(buf, " --> ");
2768                 if (indent_level + indent_add >
2769                     BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2770                         printk("[...]\n");
2771                         cursor_position = 0;
2772                         continue;
2773                 }
2774
2775                 printk(buf);
2776
2777                 btrfsic_dump_tree_sub(state, l->block_ref_to,
2778                                       indent_level + indent_add);
2779                 cursor_position = 0;
2780         }
2781 }
2782
2783 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
2784                 struct btrfsic_state *state,
2785                 struct btrfsic_block_data_ctx *next_block_ctx,
2786                 struct btrfsic_block *next_block,
2787                 struct btrfsic_block *from_block,
2788                 u64 parent_generation)
2789 {
2790         struct btrfsic_block_link *l;
2791
2792         l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev,
2793                                                 next_block_ctx->dev_bytenr,
2794                                                 from_block->dev_state->bdev,
2795                                                 from_block->dev_bytenr,
2796                                                 &state->block_link_hashtable);
2797         if (NULL == l) {
2798                 l = btrfsic_block_link_alloc();
2799                 if (NULL == l) {
2800                         printk(KERN_INFO
2801                                "btrfsic: error, kmalloc" " failed!\n");
2802                         return NULL;
2803                 }
2804
2805                 l->block_ref_to = next_block;
2806                 l->block_ref_from = from_block;
2807                 l->ref_cnt = 1;
2808                 l->parent_generation = parent_generation;
2809
2810                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2811                         btrfsic_print_add_link(state, l);
2812
2813                 list_add(&l->node_ref_to, &from_block->ref_to_list);
2814                 list_add(&l->node_ref_from, &next_block->ref_from_list);
2815
2816                 btrfsic_block_link_hashtable_add(l,
2817                                                  &state->block_link_hashtable);
2818         } else {
2819                 l->ref_cnt++;
2820                 l->parent_generation = parent_generation;
2821                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2822                         btrfsic_print_add_link(state, l);
2823         }
2824
2825         return l;
2826 }
2827
2828 static struct btrfsic_block *btrfsic_block_lookup_or_add(
2829                 struct btrfsic_state *state,
2830                 struct btrfsic_block_data_ctx *block_ctx,
2831                 const char *additional_string,
2832                 int is_metadata,
2833                 int is_iodone,
2834                 int never_written,
2835                 int mirror_num,
2836                 int *was_created)
2837 {
2838         struct btrfsic_block *block;
2839
2840         block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev,
2841                                                block_ctx->dev_bytenr,
2842                                                &state->block_hashtable);
2843         if (NULL == block) {
2844                 struct btrfsic_dev_state *dev_state;
2845
2846                 block = btrfsic_block_alloc();
2847                 if (NULL == block) {
2848                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2849                         return NULL;
2850                 }
2851                 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
2852                 if (NULL == dev_state) {
2853                         printk(KERN_INFO
2854                                "btrfsic: error, lookup dev_state failed!\n");
2855                         btrfsic_block_free(block);
2856                         return NULL;
2857                 }
2858                 block->dev_state = dev_state;
2859                 block->dev_bytenr = block_ctx->dev_bytenr;
2860                 block->logical_bytenr = block_ctx->start;
2861                 block->is_metadata = is_metadata;
2862                 block->is_iodone = is_iodone;
2863                 block->never_written = never_written;
2864                 block->mirror_num = mirror_num;
2865                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2866                         printk(KERN_INFO
2867                                "New %s%c-block @%llu (%s/%llu/%d)\n",
2868                                additional_string,
2869                                btrfsic_get_block_type(state, block),
2870                                block->logical_bytenr, dev_state->name,
2871                                block->dev_bytenr, mirror_num);
2872                 list_add(&block->all_blocks_node, &state->all_blocks_list);
2873                 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2874                 if (NULL != was_created)
2875                         *was_created = 1;
2876         } else {
2877                 if (NULL != was_created)
2878                         *was_created = 0;
2879         }
2880
2881         return block;
2882 }
2883
2884 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2885                                            u64 bytenr,
2886                                            struct btrfsic_dev_state *dev_state,
2887                                            u64 dev_bytenr)
2888 {
2889         int num_copies;
2890         int mirror_num;
2891         int ret;
2892         struct btrfsic_block_data_ctx block_ctx;
2893         int match = 0;
2894
2895         num_copies = btrfs_num_copies(state->root->fs_info,
2896                                       bytenr, state->metablock_size);
2897
2898         for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2899                 ret = btrfsic_map_block(state, bytenr, state->metablock_size,
2900                                         &block_ctx, mirror_num);
2901                 if (ret) {
2902                         printk(KERN_INFO "btrfsic:"
2903                                " btrfsic_map_block(logical @%llu,"
2904                                " mirror %d) failed!\n",
2905                                bytenr, mirror_num);
2906                         continue;
2907                 }
2908
2909                 if (dev_state->bdev == block_ctx.dev->bdev &&
2910                     dev_bytenr == block_ctx.dev_bytenr) {
2911                         match++;
2912                         btrfsic_release_block_ctx(&block_ctx);
2913                         break;
2914                 }
2915                 btrfsic_release_block_ctx(&block_ctx);
2916         }
2917
2918         if (WARN_ON(!match)) {
2919                 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
2920                        " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
2921                        " phys_bytenr=%llu)!\n",
2922                        bytenr, dev_state->name, dev_bytenr);
2923                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2924                         ret = btrfsic_map_block(state, bytenr,
2925                                                 state->metablock_size,
2926                                                 &block_ctx, mirror_num);
2927                         if (ret)
2928                                 continue;
2929
2930                         printk(KERN_INFO "Read logical bytenr @%llu maps to"
2931                                " (%s/%llu/%d)\n",
2932                                bytenr, block_ctx.dev->name,
2933                                block_ctx.dev_bytenr, mirror_num);
2934                 }
2935         }
2936 }
2937
2938 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
2939                 struct block_device *bdev)
2940 {
2941         struct btrfsic_dev_state *ds;
2942
2943         ds = btrfsic_dev_state_hashtable_lookup(bdev,
2944                                                 &btrfsic_dev_state_hashtable);
2945         return ds;
2946 }
2947
2948 int btrfsic_submit_bh(int rw, struct buffer_head *bh)
2949 {
2950         struct btrfsic_dev_state *dev_state;
2951
2952         if (!btrfsic_is_initialized)
2953                 return submit_bh(rw, bh);
2954
2955         mutex_lock(&btrfsic_mutex);
2956         /* since btrfsic_submit_bh() might also be called before
2957          * btrfsic_mount(), this might return NULL */
2958         dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
2959
2960         /* Only called to write the superblock (incl. FLUSH/FUA) */
2961         if (NULL != dev_state &&
2962             (rw & WRITE) && bh->b_size > 0) {
2963                 u64 dev_bytenr;
2964
2965                 dev_bytenr = 4096 * bh->b_blocknr;
2966                 if (dev_state->state->print_mask &
2967                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2968                         printk(KERN_INFO
2969                                "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu),"
2970                                " size=%zu, data=%p, bdev=%p)\n",
2971                                rw, (unsigned long long)bh->b_blocknr,
2972                                dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev);
2973                 btrfsic_process_written_block(dev_state, dev_bytenr,
2974                                               &bh->b_data, 1, NULL,
2975                                               NULL, bh, rw);
2976         } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
2977                 if (dev_state->state->print_mask &
2978                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2979                         printk(KERN_INFO
2980                                "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
2981                                rw, bh->b_bdev);
2982                 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
2983                         if ((dev_state->state->print_mask &
2984                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
2985                               BTRFSIC_PRINT_MASK_VERBOSE)))
2986                                 printk(KERN_INFO
2987                                        "btrfsic_submit_bh(%s) with FLUSH"
2988                                        " but dummy block already in use"
2989                                        " (ignored)!\n",
2990                                        dev_state->name);
2991                 } else {
2992                         struct btrfsic_block *const block =
2993                                 &dev_state->dummy_block_for_bio_bh_flush;
2994
2995                         block->is_iodone = 0;
2996                         block->never_written = 0;
2997                         block->iodone_w_error = 0;
2998                         block->flush_gen = dev_state->last_flush_gen + 1;
2999                         block->submit_bio_bh_rw = rw;
3000                         block->orig_bio_bh_private = bh->b_private;
3001                         block->orig_bio_bh_end_io.bh = bh->b_end_io;
3002                         block->next_in_same_bio = NULL;
3003                         bh->b_private = block;
3004                         bh->b_end_io = btrfsic_bh_end_io;
3005                 }
3006         }
3007         mutex_unlock(&btrfsic_mutex);
3008         return submit_bh(rw, bh);
3009 }
3010
3011 void btrfsic_submit_bio(int rw, struct bio *bio)
3012 {
3013         struct btrfsic_dev_state *dev_state;
3014
3015         if (!btrfsic_is_initialized) {
3016                 submit_bio(rw, bio);
3017                 return;
3018         }
3019
3020         mutex_lock(&btrfsic_mutex);
3021         /* since btrfsic_submit_bio() is also called before
3022          * btrfsic_mount(), this might return NULL */
3023         dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
3024         if (NULL != dev_state &&
3025             (rw & WRITE) && NULL != bio->bi_io_vec) {
3026                 unsigned int i;
3027                 u64 dev_bytenr;
3028                 u64 cur_bytenr;
3029                 int bio_is_patched;
3030                 char **mapped_datav;
3031
3032                 dev_bytenr = 512 * bio->bi_sector;
3033                 bio_is_patched = 0;
3034                 if (dev_state->state->print_mask &
3035                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3036                         printk(KERN_INFO
3037                                "submit_bio(rw=0x%x, bi_vcnt=%u,"
3038                                " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
3039                                rw, bio->bi_vcnt,
3040                                (unsigned long long)bio->bi_sector, dev_bytenr,
3041                                bio->bi_bdev);
3042
3043                 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
3044                                        GFP_NOFS);
3045                 if (!mapped_datav)
3046                         goto leave;
3047                 cur_bytenr = dev_bytenr;
3048                 for (i = 0; i < bio->bi_vcnt; i++) {
3049                         BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
3050                         mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
3051                         if (!mapped_datav[i]) {
3052                                 while (i > 0) {
3053                                         i--;
3054                                         kunmap(bio->bi_io_vec[i].bv_page);
3055                                 }
3056                                 kfree(mapped_datav);
3057                                 goto leave;
3058                         }
3059                         if (dev_state->state->print_mask &
3060                             BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE)
3061                                 printk(KERN_INFO
3062                                        "#%u: bytenr=%llu, len=%u, offset=%u\n",
3063                                        i, cur_bytenr, bio->bi_io_vec[i].bv_len,
3064                                        bio->bi_io_vec[i].bv_offset);
3065                         cur_bytenr += bio->bi_io_vec[i].bv_len;
3066                 }
3067                 btrfsic_process_written_block(dev_state, dev_bytenr,
3068                                               mapped_datav, bio->bi_vcnt,
3069                                               bio, &bio_is_patched,
3070                                               NULL, rw);
3071                 while (i > 0) {
3072                         i--;
3073                         kunmap(bio->bi_io_vec[i].bv_page);
3074                 }
3075                 kfree(mapped_datav);
3076         } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
3077                 if (dev_state->state->print_mask &
3078                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3079                         printk(KERN_INFO
3080                                "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
3081                                rw, bio->bi_bdev);
3082                 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
3083                         if ((dev_state->state->print_mask &
3084                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3085                               BTRFSIC_PRINT_MASK_VERBOSE)))
3086                                 printk(KERN_INFO
3087                                        "btrfsic_submit_bio(%s) with FLUSH"
3088                                        " but dummy block already in use"
3089                                        " (ignored)!\n",
3090                                        dev_state->name);
3091                 } else {
3092                         struct btrfsic_block *const block =
3093                                 &dev_state->dummy_block_for_bio_bh_flush;
3094
3095                         block->is_iodone = 0;
3096                         block->never_written = 0;
3097                         block->iodone_w_error = 0;
3098                         block->flush_gen = dev_state->last_flush_gen + 1;
3099                         block->submit_bio_bh_rw = rw;
3100                         block->orig_bio_bh_private = bio->bi_private;
3101                         block->orig_bio_bh_end_io.bio = bio->bi_end_io;
3102                         block->next_in_same_bio = NULL;
3103                         bio->bi_private = block;
3104                         bio->bi_end_io = btrfsic_bio_end_io;
3105                 }
3106         }
3107 leave:
3108         mutex_unlock(&btrfsic_mutex);
3109
3110         submit_bio(rw, bio);
3111 }
3112
3113 int btrfsic_mount(struct btrfs_root *root,
3114                   struct btrfs_fs_devices *fs_devices,
3115                   int including_extent_data, u32 print_mask)
3116 {
3117         int ret;
3118         struct btrfsic_state *state;
3119         struct list_head *dev_head = &fs_devices->devices;
3120         struct btrfs_device *device;
3121
3122         if (root->nodesize != root->leafsize) {
3123                 printk(KERN_INFO
3124                        "btrfsic: cannot handle nodesize %d != leafsize %d!\n",
3125                        root->nodesize, root->leafsize);
3126                 return -1;
3127         }
3128         if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
3129                 printk(KERN_INFO
3130                        "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3131                        root->nodesize, PAGE_CACHE_SIZE);
3132                 return -1;
3133         }
3134         if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3135                 printk(KERN_INFO
3136                        "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3137                        root->leafsize, PAGE_CACHE_SIZE);
3138                 return -1;
3139         }
3140         if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3141                 printk(KERN_INFO
3142                        "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3143                        root->sectorsize, PAGE_CACHE_SIZE);
3144                 return -1;
3145         }
3146         state = kzalloc(sizeof(*state), GFP_NOFS);
3147         if (NULL == state) {
3148                 printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n");
3149                 return -1;
3150         }
3151
3152         if (!btrfsic_is_initialized) {
3153                 mutex_init(&btrfsic_mutex);
3154                 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable);
3155                 btrfsic_is_initialized = 1;
3156         }
3157         mutex_lock(&btrfsic_mutex);
3158         state->root = root;
3159         state->print_mask = print_mask;
3160         state->include_extent_data = including_extent_data;
3161         state->csum_size = 0;
3162         state->metablock_size = root->nodesize;
3163         state->datablock_size = root->sectorsize;
3164         INIT_LIST_HEAD(&state->all_blocks_list);
3165         btrfsic_block_hashtable_init(&state->block_hashtable);
3166         btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
3167         state->max_superblock_generation = 0;
3168         state->latest_superblock = NULL;
3169
3170         list_for_each_entry(device, dev_head, dev_list) {
3171                 struct btrfsic_dev_state *ds;
3172                 char *p;
3173
3174                 if (!device->bdev || !device->name)
3175                         continue;
3176
3177                 ds = btrfsic_dev_state_alloc();
3178                 if (NULL == ds) {
3179                         printk(KERN_INFO
3180                                "btrfs check-integrity: kmalloc() failed!\n");
3181                         mutex_unlock(&btrfsic_mutex);
3182                         return -1;
3183                 }
3184                 ds->bdev = device->bdev;
3185                 ds->state = state;
3186                 bdevname(ds->bdev, ds->name);
3187                 ds->name[BDEVNAME_SIZE - 1] = '\0';
3188                 for (p = ds->name; *p != '\0'; p++);
3189                 while (p > ds->name && *p != '/')
3190                         p--;
3191                 if (*p == '/')
3192                         p++;
3193                 strlcpy(ds->name, p, sizeof(ds->name));
3194                 btrfsic_dev_state_hashtable_add(ds,
3195                                                 &btrfsic_dev_state_hashtable);
3196         }
3197
3198         ret = btrfsic_process_superblock(state, fs_devices);
3199         if (0 != ret) {
3200                 mutex_unlock(&btrfsic_mutex);
3201                 btrfsic_unmount(root, fs_devices);
3202                 return ret;
3203         }
3204
3205         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE)
3206                 btrfsic_dump_database(state);
3207         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE)
3208                 btrfsic_dump_tree(state);
3209
3210         mutex_unlock(&btrfsic_mutex);
3211         return 0;
3212 }
3213
3214 void btrfsic_unmount(struct btrfs_root *root,
3215                      struct btrfs_fs_devices *fs_devices)
3216 {
3217         struct list_head *elem_all;
3218         struct list_head *tmp_all;
3219         struct btrfsic_state *state;
3220         struct list_head *dev_head = &fs_devices->devices;
3221         struct btrfs_device *device;
3222
3223         if (!btrfsic_is_initialized)
3224                 return;
3225
3226         mutex_lock(&btrfsic_mutex);
3227
3228         state = NULL;
3229         list_for_each_entry(device, dev_head, dev_list) {
3230                 struct btrfsic_dev_state *ds;
3231
3232                 if (!device->bdev || !device->name)
3233                         continue;
3234
3235                 ds = btrfsic_dev_state_hashtable_lookup(
3236                                 device->bdev,
3237                                 &btrfsic_dev_state_hashtable);
3238                 if (NULL != ds) {
3239                         state = ds->state;
3240                         btrfsic_dev_state_hashtable_remove(ds);
3241                         btrfsic_dev_state_free(ds);
3242                 }
3243         }
3244
3245         if (NULL == state) {
3246                 printk(KERN_INFO
3247                        "btrfsic: error, cannot find state information"
3248                        " on umount!\n");
3249                 mutex_unlock(&btrfsic_mutex);
3250                 return;
3251         }
3252
3253         /*
3254          * Don't care about keeping the lists' state up to date,
3255          * just free all memory that was allocated dynamically.
3256          * Free the blocks and the block_links.
3257          */
3258         list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) {
3259                 struct btrfsic_block *const b_all =
3260                     list_entry(elem_all, struct btrfsic_block,
3261                                all_blocks_node);
3262                 struct list_head *elem_ref_to;
3263                 struct list_head *tmp_ref_to;
3264
3265                 list_for_each_safe(elem_ref_to, tmp_ref_to,
3266                                    &b_all->ref_to_list) {
3267                         struct btrfsic_block_link *const l =
3268                             list_entry(elem_ref_to,
3269                                        struct btrfsic_block_link,
3270                                        node_ref_to);
3271
3272                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
3273                                 btrfsic_print_rem_link(state, l);
3274
3275                         l->ref_cnt--;
3276                         if (0 == l->ref_cnt)
3277                                 btrfsic_block_link_free(l);
3278                 }
3279
3280                 if (b_all->is_iodone || b_all->never_written)
3281                         btrfsic_block_free(b_all);
3282                 else
3283                         printk(KERN_INFO "btrfs: attempt to free %c-block"
3284                                " @%llu (%s/%llu/%d) on umount which is"
3285                                " not yet iodone!\n",
3286                                btrfsic_get_block_type(state, b_all),
3287                                b_all->logical_bytenr, b_all->dev_state->name,
3288                                b_all->dev_bytenr, b_all->mirror_num);
3289         }
3290
3291         mutex_unlock(&btrfsic_mutex);
3292
3293         kfree(state);
3294 }