]> Pileus Git - ~andy/linux/blob - fs/btrfs/extent_io.c
Btrfs: Change TestSetPageLocked() to trylock_page()
[~andy/linux] / fs / btrfs / extent_io.c
1 #include <linux/bitops.h>
2 #include <linux/slab.h>
3 #include <linux/bio.h>
4 #include <linux/mm.h>
5 #include <linux/gfp.h>
6 #include <linux/pagemap.h>
7 #include <linux/page-flags.h>
8 #include <linux/module.h>
9 #include <linux/spinlock.h>
10 #include <linux/blkdev.h>
11 #include <linux/swap.h>
12 #include <linux/version.h>
13 #include <linux/writeback.h>
14 #include <linux/pagevec.h>
15 #include "extent_io.h"
16 #include "extent_map.h"
17 #include "compat.h"
18
19 /* temporary define until extent_map moves out of btrfs */
20 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
21                                        unsigned long extra_flags,
22                                        void (*ctor)(void *, struct kmem_cache *,
23                                                     unsigned long));
24
25 static struct kmem_cache *extent_state_cache;
26 static struct kmem_cache *extent_buffer_cache;
27
28 static LIST_HEAD(buffers);
29 static LIST_HEAD(states);
30 static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED;
31
32 #define BUFFER_LRU_MAX 64
33
34 struct tree_entry {
35         u64 start;
36         u64 end;
37         struct rb_node rb_node;
38 };
39
40 struct extent_page_data {
41         struct bio *bio;
42         struct extent_io_tree *tree;
43         get_extent_t *get_extent;
44 };
45
46 int __init extent_io_init(void)
47 {
48         extent_state_cache = btrfs_cache_create("extent_state",
49                                             sizeof(struct extent_state), 0,
50                                             NULL);
51         if (!extent_state_cache)
52                 return -ENOMEM;
53
54         extent_buffer_cache = btrfs_cache_create("extent_buffers",
55                                             sizeof(struct extent_buffer), 0,
56                                             NULL);
57         if (!extent_buffer_cache)
58                 goto free_state_cache;
59         return 0;
60
61 free_state_cache:
62         kmem_cache_destroy(extent_state_cache);
63         return -ENOMEM;
64 }
65
66 void extent_io_exit(void)
67 {
68         struct extent_state *state;
69         struct extent_buffer *eb;
70
71         while (!list_empty(&states)) {
72                 state = list_entry(states.next, struct extent_state, leak_list);
73                 printk("state leak: start %Lu end %Lu state %lu in tree %p refs %d\n", state->start, state->end, state->state, state->tree, atomic_read(&state->refs));
74                 list_del(&state->leak_list);
75                 kmem_cache_free(extent_state_cache, state);
76
77         }
78
79         while (!list_empty(&buffers)) {
80                 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
81                 printk("buffer leak start %Lu len %lu refs %d\n", eb->start, eb->len, atomic_read(&eb->refs));
82                 list_del(&eb->leak_list);
83                 kmem_cache_free(extent_buffer_cache, eb);
84         }
85         if (extent_state_cache)
86                 kmem_cache_destroy(extent_state_cache);
87         if (extent_buffer_cache)
88                 kmem_cache_destroy(extent_buffer_cache);
89 }
90
91 void extent_io_tree_init(struct extent_io_tree *tree,
92                           struct address_space *mapping, gfp_t mask)
93 {
94         tree->state.rb_node = NULL;
95         tree->buffer.rb_node = NULL;
96         tree->ops = NULL;
97         tree->dirty_bytes = 0;
98         spin_lock_init(&tree->lock);
99         spin_lock_init(&tree->buffer_lock);
100         tree->mapping = mapping;
101 }
102 EXPORT_SYMBOL(extent_io_tree_init);
103
104 struct extent_state *alloc_extent_state(gfp_t mask)
105 {
106         struct extent_state *state;
107         unsigned long flags;
108
109         state = kmem_cache_alloc(extent_state_cache, mask);
110         if (!state)
111                 return state;
112         state->state = 0;
113         state->private = 0;
114         state->tree = NULL;
115         spin_lock_irqsave(&leak_lock, flags);
116         list_add(&state->leak_list, &states);
117         spin_unlock_irqrestore(&leak_lock, flags);
118
119         atomic_set(&state->refs, 1);
120         init_waitqueue_head(&state->wq);
121         return state;
122 }
123 EXPORT_SYMBOL(alloc_extent_state);
124
125 void free_extent_state(struct extent_state *state)
126 {
127         if (!state)
128                 return;
129         if (atomic_dec_and_test(&state->refs)) {
130                 unsigned long flags;
131                 WARN_ON(state->tree);
132                 spin_lock_irqsave(&leak_lock, flags);
133                 list_del(&state->leak_list);
134                 spin_unlock_irqrestore(&leak_lock, flags);
135                 kmem_cache_free(extent_state_cache, state);
136         }
137 }
138 EXPORT_SYMBOL(free_extent_state);
139
140 static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
141                                    struct rb_node *node)
142 {
143         struct rb_node ** p = &root->rb_node;
144         struct rb_node * parent = NULL;
145         struct tree_entry *entry;
146
147         while(*p) {
148                 parent = *p;
149                 entry = rb_entry(parent, struct tree_entry, rb_node);
150
151                 if (offset < entry->start)
152                         p = &(*p)->rb_left;
153                 else if (offset > entry->end)
154                         p = &(*p)->rb_right;
155                 else
156                         return parent;
157         }
158
159         entry = rb_entry(node, struct tree_entry, rb_node);
160         rb_link_node(node, parent, p);
161         rb_insert_color(node, root);
162         return NULL;
163 }
164
165 static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
166                                      struct rb_node **prev_ret,
167                                      struct rb_node **next_ret)
168 {
169         struct rb_root *root = &tree->state;
170         struct rb_node * n = root->rb_node;
171         struct rb_node *prev = NULL;
172         struct rb_node *orig_prev = NULL;
173         struct tree_entry *entry;
174         struct tree_entry *prev_entry = NULL;
175
176         while(n) {
177                 entry = rb_entry(n, struct tree_entry, rb_node);
178                 prev = n;
179                 prev_entry = entry;
180
181                 if (offset < entry->start)
182                         n = n->rb_left;
183                 else if (offset > entry->end)
184                         n = n->rb_right;
185                 else {
186                         return n;
187                 }
188         }
189
190         if (prev_ret) {
191                 orig_prev = prev;
192                 while(prev && offset > prev_entry->end) {
193                         prev = rb_next(prev);
194                         prev_entry = rb_entry(prev, struct tree_entry, rb_node);
195                 }
196                 *prev_ret = prev;
197                 prev = orig_prev;
198         }
199
200         if (next_ret) {
201                 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
202                 while(prev && offset < prev_entry->start) {
203                         prev = rb_prev(prev);
204                         prev_entry = rb_entry(prev, struct tree_entry, rb_node);
205                 }
206                 *next_ret = prev;
207         }
208         return NULL;
209 }
210
211 static inline struct rb_node *tree_search(struct extent_io_tree *tree,
212                                           u64 offset)
213 {
214         struct rb_node *prev = NULL;
215         struct rb_node *ret;
216
217         ret = __etree_search(tree, offset, &prev, NULL);
218         if (!ret) {
219                 return prev;
220         }
221         return ret;
222 }
223
224 static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
225                                           u64 offset, struct rb_node *node)
226 {
227         struct rb_root *root = &tree->buffer;
228         struct rb_node ** p = &root->rb_node;
229         struct rb_node * parent = NULL;
230         struct extent_buffer *eb;
231
232         while(*p) {
233                 parent = *p;
234                 eb = rb_entry(parent, struct extent_buffer, rb_node);
235
236                 if (offset < eb->start)
237                         p = &(*p)->rb_left;
238                 else if (offset > eb->start)
239                         p = &(*p)->rb_right;
240                 else
241                         return eb;
242         }
243
244         rb_link_node(node, parent, p);
245         rb_insert_color(node, root);
246         return NULL;
247 }
248
249 static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
250                                            u64 offset)
251 {
252         struct rb_root *root = &tree->buffer;
253         struct rb_node * n = root->rb_node;
254         struct extent_buffer *eb;
255
256         while(n) {
257                 eb = rb_entry(n, struct extent_buffer, rb_node);
258                 if (offset < eb->start)
259                         n = n->rb_left;
260                 else if (offset > eb->start)
261                         n = n->rb_right;
262                 else
263                         return eb;
264         }
265         return NULL;
266 }
267
268 /*
269  * utility function to look for merge candidates inside a given range.
270  * Any extents with matching state are merged together into a single
271  * extent in the tree.  Extents with EXTENT_IO in their state field
272  * are not merged because the end_io handlers need to be able to do
273  * operations on them without sleeping (or doing allocations/splits).
274  *
275  * This should be called with the tree lock held.
276  */
277 static int merge_state(struct extent_io_tree *tree,
278                        struct extent_state *state)
279 {
280         struct extent_state *other;
281         struct rb_node *other_node;
282
283         if (state->state & EXTENT_IOBITS)
284                 return 0;
285
286         other_node = rb_prev(&state->rb_node);
287         if (other_node) {
288                 other = rb_entry(other_node, struct extent_state, rb_node);
289                 if (other->end == state->start - 1 &&
290                     other->state == state->state) {
291                         state->start = other->start;
292                         other->tree = NULL;
293                         rb_erase(&other->rb_node, &tree->state);
294                         free_extent_state(other);
295                 }
296         }
297         other_node = rb_next(&state->rb_node);
298         if (other_node) {
299                 other = rb_entry(other_node, struct extent_state, rb_node);
300                 if (other->start == state->end + 1 &&
301                     other->state == state->state) {
302                         other->start = state->start;
303                         state->tree = NULL;
304                         rb_erase(&state->rb_node, &tree->state);
305                         free_extent_state(state);
306                 }
307         }
308         return 0;
309 }
310
311 static void set_state_cb(struct extent_io_tree *tree,
312                          struct extent_state *state,
313                          unsigned long bits)
314 {
315         if (tree->ops && tree->ops->set_bit_hook) {
316                 tree->ops->set_bit_hook(tree->mapping->host, state->start,
317                                         state->end, state->state, bits);
318         }
319 }
320
321 static void clear_state_cb(struct extent_io_tree *tree,
322                            struct extent_state *state,
323                            unsigned long bits)
324 {
325         if (tree->ops && tree->ops->set_bit_hook) {
326                 tree->ops->clear_bit_hook(tree->mapping->host, state->start,
327                                           state->end, state->state, bits);
328         }
329 }
330
331 /*
332  * insert an extent_state struct into the tree.  'bits' are set on the
333  * struct before it is inserted.
334  *
335  * This may return -EEXIST if the extent is already there, in which case the
336  * state struct is freed.
337  *
338  * The tree lock is not taken internally.  This is a utility function and
339  * probably isn't what you want to call (see set/clear_extent_bit).
340  */
341 static int insert_state(struct extent_io_tree *tree,
342                         struct extent_state *state, u64 start, u64 end,
343                         int bits)
344 {
345         struct rb_node *node;
346
347         if (end < start) {
348                 printk("end < start %Lu %Lu\n", end, start);
349                 WARN_ON(1);
350         }
351         if (bits & EXTENT_DIRTY)
352                 tree->dirty_bytes += end - start + 1;
353         set_state_cb(tree, state, bits);
354         state->state |= bits;
355         state->start = start;
356         state->end = end;
357         node = tree_insert(&tree->state, end, &state->rb_node);
358         if (node) {
359                 struct extent_state *found;
360                 found = rb_entry(node, struct extent_state, rb_node);
361                 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end);
362                 free_extent_state(state);
363                 return -EEXIST;
364         }
365         state->tree = tree;
366         merge_state(tree, state);
367         return 0;
368 }
369
370 /*
371  * split a given extent state struct in two, inserting the preallocated
372  * struct 'prealloc' as the newly created second half.  'split' indicates an
373  * offset inside 'orig' where it should be split.
374  *
375  * Before calling,
376  * the tree has 'orig' at [orig->start, orig->end].  After calling, there
377  * are two extent state structs in the tree:
378  * prealloc: [orig->start, split - 1]
379  * orig: [ split, orig->end ]
380  *
381  * The tree locks are not taken by this function. They need to be held
382  * by the caller.
383  */
384 static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
385                        struct extent_state *prealloc, u64 split)
386 {
387         struct rb_node *node;
388         prealloc->start = orig->start;
389         prealloc->end = split - 1;
390         prealloc->state = orig->state;
391         orig->start = split;
392
393         node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
394         if (node) {
395                 struct extent_state *found;
396                 found = rb_entry(node, struct extent_state, rb_node);
397                 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end);
398                 free_extent_state(prealloc);
399                 return -EEXIST;
400         }
401         prealloc->tree = tree;
402         return 0;
403 }
404
405 /*
406  * utility function to clear some bits in an extent state struct.
407  * it will optionally wake up any one waiting on this state (wake == 1), or
408  * forcibly remove the state from the tree (delete == 1).
409  *
410  * If no bits are set on the state struct after clearing things, the
411  * struct is freed and removed from the tree
412  */
413 static int clear_state_bit(struct extent_io_tree *tree,
414                             struct extent_state *state, int bits, int wake,
415                             int delete)
416 {
417         int ret = state->state & bits;
418
419         if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
420                 u64 range = state->end - state->start + 1;
421                 WARN_ON(range > tree->dirty_bytes);
422                 tree->dirty_bytes -= range;
423         }
424         clear_state_cb(tree, state, bits);
425         state->state &= ~bits;
426         if (wake)
427                 wake_up(&state->wq);
428         if (delete || state->state == 0) {
429                 if (state->tree) {
430                         clear_state_cb(tree, state, state->state);
431                         rb_erase(&state->rb_node, &tree->state);
432                         state->tree = NULL;
433                         free_extent_state(state);
434                 } else {
435                         WARN_ON(1);
436                 }
437         } else {
438                 merge_state(tree, state);
439         }
440         return ret;
441 }
442
443 /*
444  * clear some bits on a range in the tree.  This may require splitting
445  * or inserting elements in the tree, so the gfp mask is used to
446  * indicate which allocations or sleeping are allowed.
447  *
448  * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
449  * the given range from the tree regardless of state (ie for truncate).
450  *
451  * the range [start, end] is inclusive.
452  *
453  * This takes the tree lock, and returns < 0 on error, > 0 if any of the
454  * bits were already set, or zero if none of the bits were already set.
455  */
456 int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
457                      int bits, int wake, int delete, gfp_t mask)
458 {
459         struct extent_state *state;
460         struct extent_state *prealloc = NULL;
461         struct rb_node *node;
462         unsigned long flags;
463         int err;
464         int set = 0;
465
466 again:
467         if (!prealloc && (mask & __GFP_WAIT)) {
468                 prealloc = alloc_extent_state(mask);
469                 if (!prealloc)
470                         return -ENOMEM;
471         }
472
473         spin_lock_irqsave(&tree->lock, flags);
474         /*
475          * this search will find the extents that end after
476          * our range starts
477          */
478         node = tree_search(tree, start);
479         if (!node)
480                 goto out;
481         state = rb_entry(node, struct extent_state, rb_node);
482         if (state->start > end)
483                 goto out;
484         WARN_ON(state->end < start);
485
486         /*
487          *     | ---- desired range ---- |
488          *  | state | or
489          *  | ------------- state -------------- |
490          *
491          * We need to split the extent we found, and may flip
492          * bits on second half.
493          *
494          * If the extent we found extends past our range, we
495          * just split and search again.  It'll get split again
496          * the next time though.
497          *
498          * If the extent we found is inside our range, we clear
499          * the desired bit on it.
500          */
501
502         if (state->start < start) {
503                 if (!prealloc)
504                         prealloc = alloc_extent_state(GFP_ATOMIC);
505                 err = split_state(tree, state, prealloc, start);
506                 BUG_ON(err == -EEXIST);
507                 prealloc = NULL;
508                 if (err)
509                         goto out;
510                 if (state->end <= end) {
511                         start = state->end + 1;
512                         set |= clear_state_bit(tree, state, bits,
513                                         wake, delete);
514                 } else {
515                         start = state->start;
516                 }
517                 goto search_again;
518         }
519         /*
520          * | ---- desired range ---- |
521          *                        | state |
522          * We need to split the extent, and clear the bit
523          * on the first half
524          */
525         if (state->start <= end && state->end > end) {
526                 if (!prealloc)
527                         prealloc = alloc_extent_state(GFP_ATOMIC);
528                 err = split_state(tree, state, prealloc, end + 1);
529                 BUG_ON(err == -EEXIST);
530
531                 if (wake)
532                         wake_up(&state->wq);
533                 set |= clear_state_bit(tree, prealloc, bits,
534                                        wake, delete);
535                 prealloc = NULL;
536                 goto out;
537         }
538
539         start = state->end + 1;
540         set |= clear_state_bit(tree, state, bits, wake, delete);
541         goto search_again;
542
543 out:
544         spin_unlock_irqrestore(&tree->lock, flags);
545         if (prealloc)
546                 free_extent_state(prealloc);
547
548         return set;
549
550 search_again:
551         if (start > end)
552                 goto out;
553         spin_unlock_irqrestore(&tree->lock, flags);
554         if (mask & __GFP_WAIT)
555                 cond_resched();
556         goto again;
557 }
558 EXPORT_SYMBOL(clear_extent_bit);
559
560 static int wait_on_state(struct extent_io_tree *tree,
561                          struct extent_state *state)
562 {
563         DEFINE_WAIT(wait);
564         prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
565         spin_unlock_irq(&tree->lock);
566         schedule();
567         spin_lock_irq(&tree->lock);
568         finish_wait(&state->wq, &wait);
569         return 0;
570 }
571
572 /*
573  * waits for one or more bits to clear on a range in the state tree.
574  * The range [start, end] is inclusive.
575  * The tree lock is taken by this function
576  */
577 int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
578 {
579         struct extent_state *state;
580         struct rb_node *node;
581
582         spin_lock_irq(&tree->lock);
583 again:
584         while (1) {
585                 /*
586                  * this search will find all the extents that end after
587                  * our range starts
588                  */
589                 node = tree_search(tree, start);
590                 if (!node)
591                         break;
592
593                 state = rb_entry(node, struct extent_state, rb_node);
594
595                 if (state->start > end)
596                         goto out;
597
598                 if (state->state & bits) {
599                         start = state->start;
600                         atomic_inc(&state->refs);
601                         wait_on_state(tree, state);
602                         free_extent_state(state);
603                         goto again;
604                 }
605                 start = state->end + 1;
606
607                 if (start > end)
608                         break;
609
610                 if (need_resched()) {
611                         spin_unlock_irq(&tree->lock);
612                         cond_resched();
613                         spin_lock_irq(&tree->lock);
614                 }
615         }
616 out:
617         spin_unlock_irq(&tree->lock);
618         return 0;
619 }
620 EXPORT_SYMBOL(wait_extent_bit);
621
622 static void set_state_bits(struct extent_io_tree *tree,
623                            struct extent_state *state,
624                            int bits)
625 {
626         if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
627                 u64 range = state->end - state->start + 1;
628                 tree->dirty_bytes += range;
629         }
630         set_state_cb(tree, state, bits);
631         state->state |= bits;
632 }
633
634 /*
635  * set some bits on a range in the tree.  This may require allocations
636  * or sleeping, so the gfp mask is used to indicate what is allowed.
637  *
638  * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
639  * range already has the desired bits set.  The start of the existing
640  * range is returned in failed_start in this case.
641  *
642  * [start, end] is inclusive
643  * This takes the tree lock.
644  */
645 int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
646                    int exclusive, u64 *failed_start, gfp_t mask)
647 {
648         struct extent_state *state;
649         struct extent_state *prealloc = NULL;
650         struct rb_node *node;
651         unsigned long flags;
652         int err = 0;
653         int set;
654         u64 last_start;
655         u64 last_end;
656 again:
657         if (!prealloc && (mask & __GFP_WAIT)) {
658                 prealloc = alloc_extent_state(mask);
659                 if (!prealloc)
660                         return -ENOMEM;
661         }
662
663         spin_lock_irqsave(&tree->lock, flags);
664         /*
665          * this search will find all the extents that end after
666          * our range starts.
667          */
668         node = tree_search(tree, start);
669         if (!node) {
670                 err = insert_state(tree, prealloc, start, end, bits);
671                 prealloc = NULL;
672                 BUG_ON(err == -EEXIST);
673                 goto out;
674         }
675
676         state = rb_entry(node, struct extent_state, rb_node);
677         last_start = state->start;
678         last_end = state->end;
679
680         /*
681          * | ---- desired range ---- |
682          * | state |
683          *
684          * Just lock what we found and keep going
685          */
686         if (state->start == start && state->end <= end) {
687                 set = state->state & bits;
688                 if (set && exclusive) {
689                         *failed_start = state->start;
690                         err = -EEXIST;
691                         goto out;
692                 }
693                 set_state_bits(tree, state, bits);
694                 start = state->end + 1;
695                 merge_state(tree, state);
696                 goto search_again;
697         }
698
699         /*
700          *     | ---- desired range ---- |
701          * | state |
702          *   or
703          * | ------------- state -------------- |
704          *
705          * We need to split the extent we found, and may flip bits on
706          * second half.
707          *
708          * If the extent we found extends past our
709          * range, we just split and search again.  It'll get split
710          * again the next time though.
711          *
712          * If the extent we found is inside our range, we set the
713          * desired bit on it.
714          */
715         if (state->start < start) {
716                 set = state->state & bits;
717                 if (exclusive && set) {
718                         *failed_start = start;
719                         err = -EEXIST;
720                         goto out;
721                 }
722                 err = split_state(tree, state, prealloc, start);
723                 BUG_ON(err == -EEXIST);
724                 prealloc = NULL;
725                 if (err)
726                         goto out;
727                 if (state->end <= end) {
728                         set_state_bits(tree, state, bits);
729                         start = state->end + 1;
730                         merge_state(tree, state);
731                 } else {
732                         start = state->start;
733                 }
734                 goto search_again;
735         }
736         /*
737          * | ---- desired range ---- |
738          *     | state | or               | state |
739          *
740          * There's a hole, we need to insert something in it and
741          * ignore the extent we found.
742          */
743         if (state->start > start) {
744                 u64 this_end;
745                 if (end < last_start)
746                         this_end = end;
747                 else
748                         this_end = last_start -1;
749                 err = insert_state(tree, prealloc, start, this_end,
750                                    bits);
751                 prealloc = NULL;
752                 BUG_ON(err == -EEXIST);
753                 if (err)
754                         goto out;
755                 start = this_end + 1;
756                 goto search_again;
757         }
758         /*
759          * | ---- desired range ---- |
760          *                        | state |
761          * We need to split the extent, and set the bit
762          * on the first half
763          */
764         if (state->start <= end && state->end > end) {
765                 set = state->state & bits;
766                 if (exclusive && set) {
767                         *failed_start = start;
768                         err = -EEXIST;
769                         goto out;
770                 }
771                 err = split_state(tree, state, prealloc, end + 1);
772                 BUG_ON(err == -EEXIST);
773
774                 set_state_bits(tree, prealloc, bits);
775                 merge_state(tree, prealloc);
776                 prealloc = NULL;
777                 goto out;
778         }
779
780         goto search_again;
781
782 out:
783         spin_unlock_irqrestore(&tree->lock, flags);
784         if (prealloc)
785                 free_extent_state(prealloc);
786
787         return err;
788
789 search_again:
790         if (start > end)
791                 goto out;
792         spin_unlock_irqrestore(&tree->lock, flags);
793         if (mask & __GFP_WAIT)
794                 cond_resched();
795         goto again;
796 }
797 EXPORT_SYMBOL(set_extent_bit);
798
799 /* wrappers around set/clear extent bit */
800 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
801                      gfp_t mask)
802 {
803         return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
804                               mask);
805 }
806 EXPORT_SYMBOL(set_extent_dirty);
807
808 int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
809                        gfp_t mask)
810 {
811         return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
812 }
813 EXPORT_SYMBOL(set_extent_ordered);
814
815 int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
816                     int bits, gfp_t mask)
817 {
818         return set_extent_bit(tree, start, end, bits, 0, NULL,
819                               mask);
820 }
821 EXPORT_SYMBOL(set_extent_bits);
822
823 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
824                       int bits, gfp_t mask)
825 {
826         return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
827 }
828 EXPORT_SYMBOL(clear_extent_bits);
829
830 int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
831                      gfp_t mask)
832 {
833         return set_extent_bit(tree, start, end,
834                               EXTENT_DELALLOC | EXTENT_DIRTY,
835                               0, NULL, mask);
836 }
837 EXPORT_SYMBOL(set_extent_delalloc);
838
839 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
840                        gfp_t mask)
841 {
842         return clear_extent_bit(tree, start, end,
843                                 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
844 }
845 EXPORT_SYMBOL(clear_extent_dirty);
846
847 int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
848                          gfp_t mask)
849 {
850         return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
851 }
852 EXPORT_SYMBOL(clear_extent_ordered);
853
854 int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
855                      gfp_t mask)
856 {
857         return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
858                               mask);
859 }
860 EXPORT_SYMBOL(set_extent_new);
861
862 int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
863                        gfp_t mask)
864 {
865         return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
866 }
867 EXPORT_SYMBOL(clear_extent_new);
868
869 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
870                         gfp_t mask)
871 {
872         return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
873                               mask);
874 }
875 EXPORT_SYMBOL(set_extent_uptodate);
876
877 int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
878                           gfp_t mask)
879 {
880         return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
881 }
882 EXPORT_SYMBOL(clear_extent_uptodate);
883
884 int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
885                          gfp_t mask)
886 {
887         return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
888                               0, NULL, mask);
889 }
890 EXPORT_SYMBOL(set_extent_writeback);
891
892 int clear_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
893                            gfp_t mask)
894 {
895         return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
896 }
897 EXPORT_SYMBOL(clear_extent_writeback);
898
899 int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
900 {
901         return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
902 }
903 EXPORT_SYMBOL(wait_on_extent_writeback);
904
905 int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
906 {
907         int err;
908         u64 failed_start;
909         while (1) {
910                 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
911                                      &failed_start, mask);
912                 if (err == -EEXIST && (mask & __GFP_WAIT)) {
913                         wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
914                         start = failed_start;
915                 } else {
916                         break;
917                 }
918                 WARN_ON(start > end);
919         }
920         return err;
921 }
922 EXPORT_SYMBOL(lock_extent);
923
924 int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
925                   gfp_t mask)
926 {
927         return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
928 }
929 EXPORT_SYMBOL(unlock_extent);
930
931 /*
932  * helper function to set pages and extents in the tree dirty
933  */
934 int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
935 {
936         unsigned long index = start >> PAGE_CACHE_SHIFT;
937         unsigned long end_index = end >> PAGE_CACHE_SHIFT;
938         struct page *page;
939
940         while (index <= end_index) {
941                 page = find_get_page(tree->mapping, index);
942                 BUG_ON(!page);
943                 __set_page_dirty_nobuffers(page);
944                 page_cache_release(page);
945                 index++;
946         }
947         set_extent_dirty(tree, start, end, GFP_NOFS);
948         return 0;
949 }
950 EXPORT_SYMBOL(set_range_dirty);
951
952 /*
953  * helper function to set both pages and extents in the tree writeback
954  */
955 int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
956 {
957         unsigned long index = start >> PAGE_CACHE_SHIFT;
958         unsigned long end_index = end >> PAGE_CACHE_SHIFT;
959         struct page *page;
960
961         while (index <= end_index) {
962                 page = find_get_page(tree->mapping, index);
963                 BUG_ON(!page);
964                 set_page_writeback(page);
965                 page_cache_release(page);
966                 index++;
967         }
968         set_extent_writeback(tree, start, end, GFP_NOFS);
969         return 0;
970 }
971 EXPORT_SYMBOL(set_range_writeback);
972
973 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
974                           u64 *start_ret, u64 *end_ret, int bits)
975 {
976         struct rb_node *node;
977         struct extent_state *state;
978         int ret = 1;
979
980         spin_lock_irq(&tree->lock);
981         /*
982          * this search will find all the extents that end after
983          * our range starts.
984          */
985         node = tree_search(tree, start);
986         if (!node) {
987                 goto out;
988         }
989
990         while(1) {
991                 state = rb_entry(node, struct extent_state, rb_node);
992                 if (state->end >= start && (state->state & bits)) {
993                         *start_ret = state->start;
994                         *end_ret = state->end;
995                         ret = 0;
996                         break;
997                 }
998                 node = rb_next(node);
999                 if (!node)
1000                         break;
1001         }
1002 out:
1003         spin_unlock_irq(&tree->lock);
1004         return ret;
1005 }
1006 EXPORT_SYMBOL(find_first_extent_bit);
1007
1008 struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
1009                                                  u64 start, int bits)
1010 {
1011         struct rb_node *node;
1012         struct extent_state *state;
1013
1014         /*
1015          * this search will find all the extents that end after
1016          * our range starts.
1017          */
1018         node = tree_search(tree, start);
1019         if (!node) {
1020                 goto out;
1021         }
1022
1023         while(1) {
1024                 state = rb_entry(node, struct extent_state, rb_node);
1025                 if (state->end >= start && (state->state & bits)) {
1026                         return state;
1027                 }
1028                 node = rb_next(node);
1029                 if (!node)
1030                         break;
1031         }
1032 out:
1033         return NULL;
1034 }
1035 EXPORT_SYMBOL(find_first_extent_bit_state);
1036
1037 u64 find_lock_delalloc_range(struct extent_io_tree *tree,
1038                              u64 *start, u64 *end, u64 max_bytes)
1039 {
1040         struct rb_node *node;
1041         struct extent_state *state;
1042         u64 cur_start = *start;
1043         u64 found = 0;
1044         u64 total_bytes = 0;
1045
1046         spin_lock_irq(&tree->lock);
1047         /*
1048          * this search will find all the extents that end after
1049          * our range starts.
1050          */
1051 search_again:
1052         node = tree_search(tree, cur_start);
1053         if (!node) {
1054                 if (!found)
1055                         *end = (u64)-1;
1056                 goto out;
1057         }
1058
1059         while(1) {
1060                 state = rb_entry(node, struct extent_state, rb_node);
1061                 if (found && state->start != cur_start) {
1062                         goto out;
1063                 }
1064                 if (!(state->state & EXTENT_DELALLOC)) {
1065                         if (!found)
1066                                 *end = state->end;
1067                         goto out;
1068                 }
1069                 if (!found) {
1070                         struct extent_state *prev_state;
1071                         struct rb_node *prev_node = node;
1072                         while(1) {
1073                                 prev_node = rb_prev(prev_node);
1074                                 if (!prev_node)
1075                                         break;
1076                                 prev_state = rb_entry(prev_node,
1077                                                       struct extent_state,
1078                                                       rb_node);
1079                                 if (!(prev_state->state & EXTENT_DELALLOC))
1080                                         break;
1081                                 state = prev_state;
1082                                 node = prev_node;
1083                         }
1084                 }
1085                 if (state->state & EXTENT_LOCKED) {
1086                         DEFINE_WAIT(wait);
1087                         atomic_inc(&state->refs);
1088                         prepare_to_wait(&state->wq, &wait,
1089                                         TASK_UNINTERRUPTIBLE);
1090                         spin_unlock_irq(&tree->lock);
1091                         schedule();
1092                         spin_lock_irq(&tree->lock);
1093                         finish_wait(&state->wq, &wait);
1094                         free_extent_state(state);
1095                         goto search_again;
1096                 }
1097                 set_state_cb(tree, state, EXTENT_LOCKED);
1098                 state->state |= EXTENT_LOCKED;
1099                 if (!found)
1100                         *start = state->start;
1101                 found++;
1102                 *end = state->end;
1103                 cur_start = state->end + 1;
1104                 node = rb_next(node);
1105                 if (!node)
1106                         break;
1107                 total_bytes += state->end - state->start + 1;
1108                 if (total_bytes >= max_bytes)
1109                         break;
1110         }
1111 out:
1112         spin_unlock_irq(&tree->lock);
1113         return found;
1114 }
1115
1116 u64 count_range_bits(struct extent_io_tree *tree,
1117                      u64 *start, u64 search_end, u64 max_bytes,
1118                      unsigned long bits)
1119 {
1120         struct rb_node *node;
1121         struct extent_state *state;
1122         u64 cur_start = *start;
1123         u64 total_bytes = 0;
1124         int found = 0;
1125
1126         if (search_end <= cur_start) {
1127                 printk("search_end %Lu start %Lu\n", search_end, cur_start);
1128                 WARN_ON(1);
1129                 return 0;
1130         }
1131
1132         spin_lock_irq(&tree->lock);
1133         if (cur_start == 0 && bits == EXTENT_DIRTY) {
1134                 total_bytes = tree->dirty_bytes;
1135                 goto out;
1136         }
1137         /*
1138          * this search will find all the extents that end after
1139          * our range starts.
1140          */
1141         node = tree_search(tree, cur_start);
1142         if (!node) {
1143                 goto out;
1144         }
1145
1146         while(1) {
1147                 state = rb_entry(node, struct extent_state, rb_node);
1148                 if (state->start > search_end)
1149                         break;
1150                 if (state->end >= cur_start && (state->state & bits)) {
1151                         total_bytes += min(search_end, state->end) + 1 -
1152                                        max(cur_start, state->start);
1153                         if (total_bytes >= max_bytes)
1154                                 break;
1155                         if (!found) {
1156                                 *start = state->start;
1157                                 found = 1;
1158                         }
1159                 }
1160                 node = rb_next(node);
1161                 if (!node)
1162                         break;
1163         }
1164 out:
1165         spin_unlock_irq(&tree->lock);
1166         return total_bytes;
1167 }
1168 /*
1169  * helper function to lock both pages and extents in the tree.
1170  * pages must be locked first.
1171  */
1172 int lock_range(struct extent_io_tree *tree, u64 start, u64 end)
1173 {
1174         unsigned long index = start >> PAGE_CACHE_SHIFT;
1175         unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1176         struct page *page;
1177         int err;
1178
1179         while (index <= end_index) {
1180                 page = grab_cache_page(tree->mapping, index);
1181                 if (!page) {
1182                         err = -ENOMEM;
1183                         goto failed;
1184                 }
1185                 if (IS_ERR(page)) {
1186                         err = PTR_ERR(page);
1187                         goto failed;
1188                 }
1189                 index++;
1190         }
1191         lock_extent(tree, start, end, GFP_NOFS);
1192         return 0;
1193
1194 failed:
1195         /*
1196          * we failed above in getting the page at 'index', so we undo here
1197          * up to but not including the page at 'index'
1198          */
1199         end_index = index;
1200         index = start >> PAGE_CACHE_SHIFT;
1201         while (index < end_index) {
1202                 page = find_get_page(tree->mapping, index);
1203                 unlock_page(page);
1204                 page_cache_release(page);
1205                 index++;
1206         }
1207         return err;
1208 }
1209 EXPORT_SYMBOL(lock_range);
1210
1211 /*
1212  * helper function to unlock both pages and extents in the tree.
1213  */
1214 int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
1215 {
1216         unsigned long index = start >> PAGE_CACHE_SHIFT;
1217         unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1218         struct page *page;
1219
1220         while (index <= end_index) {
1221                 page = find_get_page(tree->mapping, index);
1222                 unlock_page(page);
1223                 page_cache_release(page);
1224                 index++;
1225         }
1226         unlock_extent(tree, start, end, GFP_NOFS);
1227         return 0;
1228 }
1229 EXPORT_SYMBOL(unlock_range);
1230
1231 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
1232 {
1233         struct rb_node *node;
1234         struct extent_state *state;
1235         int ret = 0;
1236
1237         spin_lock_irq(&tree->lock);
1238         /*
1239          * this search will find all the extents that end after
1240          * our range starts.
1241          */
1242         node = tree_search(tree, start);
1243         if (!node) {
1244                 ret = -ENOENT;
1245                 goto out;
1246         }
1247         state = rb_entry(node, struct extent_state, rb_node);
1248         if (state->start != start) {
1249                 ret = -ENOENT;
1250                 goto out;
1251         }
1252         state->private = private;
1253 out:
1254         spin_unlock_irq(&tree->lock);
1255         return ret;
1256 }
1257
1258 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1259 {
1260         struct rb_node *node;
1261         struct extent_state *state;
1262         int ret = 0;
1263
1264         spin_lock_irq(&tree->lock);
1265         /*
1266          * this search will find all the extents that end after
1267          * our range starts.
1268          */
1269         node = tree_search(tree, start);
1270         if (!node) {
1271                 ret = -ENOENT;
1272                 goto out;
1273         }
1274         state = rb_entry(node, struct extent_state, rb_node);
1275         if (state->start != start) {
1276                 ret = -ENOENT;
1277                 goto out;
1278         }
1279         *private = state->private;
1280 out:
1281         spin_unlock_irq(&tree->lock);
1282         return ret;
1283 }
1284
1285 /*
1286  * searches a range in the state tree for a given mask.
1287  * If 'filled' == 1, this returns 1 only if every extent in the tree
1288  * has the bits set.  Otherwise, 1 is returned if any bit in the
1289  * range is found set.
1290  */
1291 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1292                    int bits, int filled)
1293 {
1294         struct extent_state *state = NULL;
1295         struct rb_node *node;
1296         int bitset = 0;
1297         unsigned long flags;
1298
1299         spin_lock_irqsave(&tree->lock, flags);
1300         node = tree_search(tree, start);
1301         while (node && start <= end) {
1302                 state = rb_entry(node, struct extent_state, rb_node);
1303
1304                 if (filled && state->start > start) {
1305                         bitset = 0;
1306                         break;
1307                 }
1308
1309                 if (state->start > end)
1310                         break;
1311
1312                 if (state->state & bits) {
1313                         bitset = 1;
1314                         if (!filled)
1315                                 break;
1316                 } else if (filled) {
1317                         bitset = 0;
1318                         break;
1319                 }
1320                 start = state->end + 1;
1321                 if (start > end)
1322                         break;
1323                 node = rb_next(node);
1324                 if (!node) {
1325                         if (filled)
1326                                 bitset = 0;
1327                         break;
1328                 }
1329         }
1330         spin_unlock_irqrestore(&tree->lock, flags);
1331         return bitset;
1332 }
1333 EXPORT_SYMBOL(test_range_bit);
1334
1335 /*
1336  * helper function to set a given page up to date if all the
1337  * extents in the tree for that page are up to date
1338  */
1339 static int check_page_uptodate(struct extent_io_tree *tree,
1340                                struct page *page)
1341 {
1342         u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1343         u64 end = start + PAGE_CACHE_SIZE - 1;
1344         if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
1345                 SetPageUptodate(page);
1346         return 0;
1347 }
1348
1349 /*
1350  * helper function to unlock a page if all the extents in the tree
1351  * for that page are unlocked
1352  */
1353 static int check_page_locked(struct extent_io_tree *tree,
1354                              struct page *page)
1355 {
1356         u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1357         u64 end = start + PAGE_CACHE_SIZE - 1;
1358         if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
1359                 unlock_page(page);
1360         return 0;
1361 }
1362
1363 /*
1364  * helper function to end page writeback if all the extents
1365  * in the tree for that page are done with writeback
1366  */
1367 static int check_page_writeback(struct extent_io_tree *tree,
1368                              struct page *page)
1369 {
1370         u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1371         u64 end = start + PAGE_CACHE_SIZE - 1;
1372         if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
1373                 end_page_writeback(page);
1374         return 0;
1375 }
1376
1377 /* lots and lots of room for performance fixes in the end_bio funcs */
1378
1379 /*
1380  * after a writepage IO is done, we need to:
1381  * clear the uptodate bits on error
1382  * clear the writeback bits in the extent tree for this IO
1383  * end_page_writeback if the page has no more pending IO
1384  *
1385  * Scheduling is not allowed, so the extent state tree is expected
1386  * to have one and only one object corresponding to this IO.
1387  */
1388 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1389 static void end_bio_extent_writepage(struct bio *bio, int err)
1390 #else
1391 static int end_bio_extent_writepage(struct bio *bio,
1392                                    unsigned int bytes_done, int err)
1393 #endif
1394 {
1395         int uptodate = err == 0;
1396         struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1397         struct extent_state *state = bio->bi_private;
1398         struct extent_io_tree *tree = state->tree;
1399         struct rb_node *node;
1400         u64 start;
1401         u64 end;
1402         u64 cur;
1403         int whole_page;
1404         int ret;
1405         unsigned long flags;
1406
1407 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1408         if (bio->bi_size)
1409                 return 1;
1410 #endif
1411         do {
1412                 struct page *page = bvec->bv_page;
1413                 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1414                          bvec->bv_offset;
1415                 end = start + bvec->bv_len - 1;
1416
1417                 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1418                         whole_page = 1;
1419                 else
1420                         whole_page = 0;
1421
1422                 if (--bvec >= bio->bi_io_vec)
1423                         prefetchw(&bvec->bv_page->flags);
1424                 if (tree->ops && tree->ops->writepage_end_io_hook) {
1425                         ret = tree->ops->writepage_end_io_hook(page, start,
1426                                                        end, state, uptodate);
1427                         if (ret)
1428                                 uptodate = 0;
1429                 }
1430
1431                 if (!uptodate && tree->ops &&
1432                     tree->ops->writepage_io_failed_hook) {
1433                         ret = tree->ops->writepage_io_failed_hook(bio, page,
1434                                                          start, end, state);
1435                         if (ret == 0) {
1436                                 state = NULL;
1437                                 uptodate = (err == 0);
1438                                 continue;
1439                         }
1440                 }
1441
1442                 if (!uptodate) {
1443                         clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
1444                         ClearPageUptodate(page);
1445                         SetPageError(page);
1446                 }
1447
1448                 /*
1449                  * bios can get merged in funny ways, and so we need to
1450                  * be careful with the state variable.  We know the
1451                  * state won't be merged with others because it has
1452                  * WRITEBACK set, but we can't be sure each biovec is
1453                  * sequential in the file.  So, if our cached state
1454                  * doesn't match the expected end, search the tree
1455                  * for the correct one.
1456                  */
1457
1458                 spin_lock_irqsave(&tree->lock, flags);
1459                 if (!state || state->end != end) {
1460                         state = NULL;
1461                         node = __etree_search(tree, start, NULL, NULL);
1462                         if (node) {
1463                                 state = rb_entry(node, struct extent_state,
1464                                                  rb_node);
1465                                 if (state->end != end ||
1466                                     !(state->state & EXTENT_WRITEBACK))
1467                                         state = NULL;
1468                         }
1469                         if (!state) {
1470                                 spin_unlock_irqrestore(&tree->lock, flags);
1471                                 clear_extent_writeback(tree, start,
1472                                                        end, GFP_ATOMIC);
1473                                 goto next_io;
1474                         }
1475                 }
1476                 cur = end;
1477                 while(1) {
1478                         struct extent_state *clear = state;
1479                         cur = state->start;
1480                         node = rb_prev(&state->rb_node);
1481                         if (node) {
1482                                 state = rb_entry(node,
1483                                                  struct extent_state,
1484                                                  rb_node);
1485                         } else {
1486                                 state = NULL;
1487                         }
1488
1489                         clear_state_bit(tree, clear, EXTENT_WRITEBACK,
1490                                         1, 0);
1491                         if (cur == start)
1492                                 break;
1493                         if (cur < start) {
1494                                 WARN_ON(1);
1495                                 break;
1496                         }
1497                         if (!node)
1498                                 break;
1499                 }
1500                 /* before releasing the lock, make sure the next state
1501                  * variable has the expected bits set and corresponds
1502                  * to the correct offsets in the file
1503                  */
1504                 if (state && (state->end + 1 != start ||
1505                     !(state->state & EXTENT_WRITEBACK))) {
1506                         state = NULL;
1507                 }
1508                 spin_unlock_irqrestore(&tree->lock, flags);
1509 next_io:
1510
1511                 if (whole_page)
1512                         end_page_writeback(page);
1513                 else
1514                         check_page_writeback(tree, page);
1515         } while (bvec >= bio->bi_io_vec);
1516         bio_put(bio);
1517 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1518         return 0;
1519 #endif
1520 }
1521
1522 /*
1523  * after a readpage IO is done, we need to:
1524  * clear the uptodate bits on error
1525  * set the uptodate bits if things worked
1526  * set the page up to date if all extents in the tree are uptodate
1527  * clear the lock bit in the extent tree
1528  * unlock the page if there are no other extents locked for it
1529  *
1530  * Scheduling is not allowed, so the extent state tree is expected
1531  * to have one and only one object corresponding to this IO.
1532  */
1533 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1534 static void end_bio_extent_readpage(struct bio *bio, int err)
1535 #else
1536 static int end_bio_extent_readpage(struct bio *bio,
1537                                    unsigned int bytes_done, int err)
1538 #endif
1539 {
1540         int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1541         struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1542         struct extent_state *state = bio->bi_private;
1543         struct extent_io_tree *tree = state->tree;
1544         struct rb_node *node;
1545         u64 start;
1546         u64 end;
1547         u64 cur;
1548         unsigned long flags;
1549         int whole_page;
1550         int ret;
1551
1552 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1553         if (bio->bi_size)
1554                 return 1;
1555 #endif
1556
1557         do {
1558                 struct page *page = bvec->bv_page;
1559                 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1560                         bvec->bv_offset;
1561                 end = start + bvec->bv_len - 1;
1562
1563                 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1564                         whole_page = 1;
1565                 else
1566                         whole_page = 0;
1567
1568                 if (--bvec >= bio->bi_io_vec)
1569                         prefetchw(&bvec->bv_page->flags);
1570
1571                 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
1572                         ret = tree->ops->readpage_end_io_hook(page, start, end,
1573                                                               state);
1574                         if (ret)
1575                                 uptodate = 0;
1576                 }
1577                 if (!uptodate && tree->ops &&
1578                     tree->ops->readpage_io_failed_hook) {
1579                         ret = tree->ops->readpage_io_failed_hook(bio, page,
1580                                                          start, end, state);
1581                         if (ret == 0) {
1582                                 state = NULL;
1583                                 uptodate =
1584                                         test_bit(BIO_UPTODATE, &bio->bi_flags);
1585                                 continue;
1586                         }
1587                 }
1588
1589                 spin_lock_irqsave(&tree->lock, flags);
1590                 if (!state || state->end != end) {
1591                         state = NULL;
1592                         node = __etree_search(tree, start, NULL, NULL);
1593                         if (node) {
1594                                 state = rb_entry(node, struct extent_state,
1595                                                  rb_node);
1596                                 if (state->end != end ||
1597                                     !(state->state & EXTENT_LOCKED))
1598                                         state = NULL;
1599                         }
1600                         if (!state) {
1601                                 spin_unlock_irqrestore(&tree->lock, flags);
1602                                 if (uptodate)
1603                                         set_extent_uptodate(tree, start, end,
1604                                                             GFP_ATOMIC);
1605                                 unlock_extent(tree, start, end, GFP_ATOMIC);
1606                                 goto next_io;
1607                         }
1608                 }
1609
1610                 cur = end;
1611                 while(1) {
1612                         struct extent_state *clear = state;
1613                         cur = state->start;
1614                         node = rb_prev(&state->rb_node);
1615                         if (node) {
1616                                 state = rb_entry(node,
1617                                          struct extent_state,
1618                                          rb_node);
1619                         } else {
1620                                 state = NULL;
1621                         }
1622                         if (uptodate) {
1623                                 set_state_cb(tree, clear, EXTENT_UPTODATE);
1624                                 clear->state |= EXTENT_UPTODATE;
1625                         }
1626                         clear_state_bit(tree, clear, EXTENT_LOCKED,
1627                                         1, 0);
1628                         if (cur == start)
1629                                 break;
1630                         if (cur < start) {
1631                                 WARN_ON(1);
1632                                 break;
1633                         }
1634                         if (!node)
1635                                 break;
1636                 }
1637                 /* before releasing the lock, make sure the next state
1638                  * variable has the expected bits set and corresponds
1639                  * to the correct offsets in the file
1640                  */
1641                 if (state && (state->end + 1 != start ||
1642                     !(state->state & EXTENT_LOCKED))) {
1643                         state = NULL;
1644                 }
1645                 spin_unlock_irqrestore(&tree->lock, flags);
1646 next_io:
1647                 if (whole_page) {
1648                         if (uptodate) {
1649                                 SetPageUptodate(page);
1650                         } else {
1651                                 ClearPageUptodate(page);
1652                                 SetPageError(page);
1653                         }
1654                         unlock_page(page);
1655                 } else {
1656                         if (uptodate) {
1657                                 check_page_uptodate(tree, page);
1658                         } else {
1659                                 ClearPageUptodate(page);
1660                                 SetPageError(page);
1661                         }
1662                         check_page_locked(tree, page);
1663                 }
1664         } while (bvec >= bio->bi_io_vec);
1665
1666         bio_put(bio);
1667 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1668         return 0;
1669 #endif
1670 }
1671
1672 /*
1673  * IO done from prepare_write is pretty simple, we just unlock
1674  * the structs in the extent tree when done, and set the uptodate bits
1675  * as appropriate.
1676  */
1677 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1678 static void end_bio_extent_preparewrite(struct bio *bio, int err)
1679 #else
1680 static int end_bio_extent_preparewrite(struct bio *bio,
1681                                        unsigned int bytes_done, int err)
1682 #endif
1683 {
1684         const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1685         struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1686         struct extent_state *state = bio->bi_private;
1687         struct extent_io_tree *tree = state->tree;
1688         u64 start;
1689         u64 end;
1690
1691 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1692         if (bio->bi_size)
1693                 return 1;
1694 #endif
1695
1696         do {
1697                 struct page *page = bvec->bv_page;
1698                 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1699                         bvec->bv_offset;
1700                 end = start + bvec->bv_len - 1;
1701
1702                 if (--bvec >= bio->bi_io_vec)
1703                         prefetchw(&bvec->bv_page->flags);
1704
1705                 if (uptodate) {
1706                         set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1707                 } else {
1708                         ClearPageUptodate(page);
1709                         SetPageError(page);
1710                 }
1711
1712                 unlock_extent(tree, start, end, GFP_ATOMIC);
1713
1714         } while (bvec >= bio->bi_io_vec);
1715
1716         bio_put(bio);
1717 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1718         return 0;
1719 #endif
1720 }
1721
1722 static struct bio *
1723 extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
1724                  gfp_t gfp_flags)
1725 {
1726         struct bio *bio;
1727
1728         bio = bio_alloc(gfp_flags, nr_vecs);
1729
1730         if (bio == NULL && (current->flags & PF_MEMALLOC)) {
1731                 while (!bio && (nr_vecs /= 2))
1732                         bio = bio_alloc(gfp_flags, nr_vecs);
1733         }
1734
1735         if (bio) {
1736                 bio->bi_size = 0;
1737                 bio->bi_bdev = bdev;
1738                 bio->bi_sector = first_sector;
1739         }
1740         return bio;
1741 }
1742
1743 static int submit_one_bio(int rw, struct bio *bio, int mirror_num)
1744 {
1745         int ret = 0;
1746         struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1747         struct page *page = bvec->bv_page;
1748         struct extent_io_tree *tree = bio->bi_private;
1749         struct rb_node *node;
1750         struct extent_state *state;
1751         u64 start;
1752         u64 end;
1753
1754         start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1755         end = start + bvec->bv_len - 1;
1756
1757         spin_lock_irq(&tree->lock);
1758         node = __etree_search(tree, start, NULL, NULL);
1759         BUG_ON(!node);
1760         state = rb_entry(node, struct extent_state, rb_node);
1761         while(state->end < end) {
1762                 node = rb_next(node);
1763                 state = rb_entry(node, struct extent_state, rb_node);
1764         }
1765         BUG_ON(state->end != end);
1766         spin_unlock_irq(&tree->lock);
1767
1768         bio->bi_private = state;
1769
1770         bio_get(bio);
1771
1772         if (tree->ops && tree->ops->submit_bio_hook)
1773                 tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
1774                                            mirror_num);
1775         else
1776                 submit_bio(rw, bio);
1777         if (bio_flagged(bio, BIO_EOPNOTSUPP))
1778                 ret = -EOPNOTSUPP;
1779         bio_put(bio);
1780         return ret;
1781 }
1782
1783 static int submit_extent_page(int rw, struct extent_io_tree *tree,
1784                               struct page *page, sector_t sector,
1785                               size_t size, unsigned long offset,
1786                               struct block_device *bdev,
1787                               struct bio **bio_ret,
1788                               unsigned long max_pages,
1789                               bio_end_io_t end_io_func,
1790                               int mirror_num)
1791 {
1792         int ret = 0;
1793         struct bio *bio;
1794         int nr;
1795
1796         if (bio_ret && *bio_ret) {
1797                 bio = *bio_ret;
1798                 if (bio->bi_sector + (bio->bi_size >> 9) != sector ||
1799                     (tree->ops && tree->ops->merge_bio_hook &&
1800                      tree->ops->merge_bio_hook(page, offset, size, bio)) ||
1801                     bio_add_page(bio, page, size, offset) < size) {
1802                         ret = submit_one_bio(rw, bio, mirror_num);
1803                         bio = NULL;
1804                 } else {
1805                         return 0;
1806                 }
1807         }
1808         nr = bio_get_nr_vecs(bdev);
1809         bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
1810         if (!bio) {
1811                 printk("failed to allocate bio nr %d\n", nr);
1812         }
1813
1814
1815         bio_add_page(bio, page, size, offset);
1816         bio->bi_end_io = end_io_func;
1817         bio->bi_private = tree;
1818
1819         if (bio_ret) {
1820                 *bio_ret = bio;
1821         } else {
1822                 ret = submit_one_bio(rw, bio, mirror_num);
1823         }
1824
1825         return ret;
1826 }
1827
1828 void set_page_extent_mapped(struct page *page)
1829 {
1830         if (!PagePrivate(page)) {
1831                 SetPagePrivate(page);
1832                 page_cache_get(page);
1833                 set_page_private(page, EXTENT_PAGE_PRIVATE);
1834         }
1835 }
1836
1837 void set_page_extent_head(struct page *page, unsigned long len)
1838 {
1839         set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
1840 }
1841
1842 /*
1843  * basic readpage implementation.  Locked extent state structs are inserted
1844  * into the tree that are removed when the IO is done (by the end_io
1845  * handlers)
1846  */
1847 static int __extent_read_full_page(struct extent_io_tree *tree,
1848                                    struct page *page,
1849                                    get_extent_t *get_extent,
1850                                    struct bio **bio, int mirror_num)
1851 {
1852         struct inode *inode = page->mapping->host;
1853         u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1854         u64 page_end = start + PAGE_CACHE_SIZE - 1;
1855         u64 end;
1856         u64 cur = start;
1857         u64 extent_offset;
1858         u64 last_byte = i_size_read(inode);
1859         u64 block_start;
1860         u64 cur_end;
1861         sector_t sector;
1862         struct extent_map *em;
1863         struct block_device *bdev;
1864         int ret;
1865         int nr = 0;
1866         size_t page_offset = 0;
1867         size_t iosize;
1868         size_t blocksize = inode->i_sb->s_blocksize;
1869
1870         set_page_extent_mapped(page);
1871
1872         end = page_end;
1873         lock_extent(tree, start, end, GFP_NOFS);
1874
1875         while (cur <= end) {
1876                 if (cur >= last_byte) {
1877                         char *userpage;
1878                         iosize = PAGE_CACHE_SIZE - page_offset;
1879                         userpage = kmap_atomic(page, KM_USER0);
1880                         memset(userpage + page_offset, 0, iosize);
1881                         flush_dcache_page(page);
1882                         kunmap_atomic(userpage, KM_USER0);
1883                         set_extent_uptodate(tree, cur, cur + iosize - 1,
1884                                             GFP_NOFS);
1885                         unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1886                         break;
1887                 }
1888                 em = get_extent(inode, page, page_offset, cur,
1889                                 end - cur + 1, 0);
1890                 if (IS_ERR(em) || !em) {
1891                         SetPageError(page);
1892                         unlock_extent(tree, cur, end, GFP_NOFS);
1893                         break;
1894                 }
1895                 extent_offset = cur - em->start;
1896                 if (extent_map_end(em) <= cur) {
1897 printk("bad mapping em [%Lu %Lu] cur %Lu\n", em->start, extent_map_end(em), cur);
1898                 }
1899                 BUG_ON(extent_map_end(em) <= cur);
1900                 if (end < cur) {
1901 printk("2bad mapping end %Lu cur %Lu\n", end, cur);
1902                 }
1903                 BUG_ON(end < cur);
1904
1905                 iosize = min(extent_map_end(em) - cur, end - cur + 1);
1906                 cur_end = min(extent_map_end(em) - 1, end);
1907                 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1908                 sector = (em->block_start + extent_offset) >> 9;
1909                 bdev = em->bdev;
1910                 block_start = em->block_start;
1911                 free_extent_map(em);
1912                 em = NULL;
1913
1914                 /* we've found a hole, just zero and go on */
1915                 if (block_start == EXTENT_MAP_HOLE) {
1916                         char *userpage;
1917                         userpage = kmap_atomic(page, KM_USER0);
1918                         memset(userpage + page_offset, 0, iosize);
1919                         flush_dcache_page(page);
1920                         kunmap_atomic(userpage, KM_USER0);
1921
1922                         set_extent_uptodate(tree, cur, cur + iosize - 1,
1923                                             GFP_NOFS);
1924                         unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1925                         cur = cur + iosize;
1926                         page_offset += iosize;
1927                         continue;
1928                 }
1929                 /* the get_extent function already copied into the page */
1930                 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
1931                         unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1932                         cur = cur + iosize;
1933                         page_offset += iosize;
1934                         continue;
1935                 }
1936                 /* we have an inline extent but it didn't get marked up
1937                  * to date.  Error out
1938                  */
1939                 if (block_start == EXTENT_MAP_INLINE) {
1940                         SetPageError(page);
1941                         unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1942                         cur = cur + iosize;
1943                         page_offset += iosize;
1944                         continue;
1945                 }
1946
1947                 ret = 0;
1948                 if (tree->ops && tree->ops->readpage_io_hook) {
1949                         ret = tree->ops->readpage_io_hook(page, cur,
1950                                                           cur + iosize - 1);
1951                 }
1952                 if (!ret) {
1953                         unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
1954                         pnr -= page->index;
1955                         ret = submit_extent_page(READ, tree, page,
1956                                          sector, iosize, page_offset,
1957                                          bdev, bio, pnr,
1958                                          end_bio_extent_readpage, mirror_num);
1959                         nr++;
1960                 }
1961                 if (ret)
1962                         SetPageError(page);
1963                 cur = cur + iosize;
1964                 page_offset += iosize;
1965         }
1966         if (!nr) {
1967                 if (!PageError(page))
1968                         SetPageUptodate(page);
1969                 unlock_page(page);
1970         }
1971         return 0;
1972 }
1973
1974 int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
1975                             get_extent_t *get_extent)
1976 {
1977         struct bio *bio = NULL;
1978         int ret;
1979
1980         ret = __extent_read_full_page(tree, page, get_extent, &bio, 0);
1981         if (bio)
1982                 submit_one_bio(READ, bio, 0);
1983         return ret;
1984 }
1985 EXPORT_SYMBOL(extent_read_full_page);
1986
1987 /*
1988  * the writepage semantics are similar to regular writepage.  extent
1989  * records are inserted to lock ranges in the tree, and as dirty areas
1990  * are found, they are marked writeback.  Then the lock bits are removed
1991  * and the end_io handler clears the writeback ranges
1992  */
1993 static int __extent_writepage(struct page *page, struct writeback_control *wbc,
1994                               void *data)
1995 {
1996         struct inode *inode = page->mapping->host;
1997         struct extent_page_data *epd = data;
1998         struct extent_io_tree *tree = epd->tree;
1999         u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2000         u64 delalloc_start;
2001         u64 page_end = start + PAGE_CACHE_SIZE - 1;
2002         u64 end;
2003         u64 cur = start;
2004         u64 extent_offset;
2005         u64 last_byte = i_size_read(inode);
2006         u64 block_start;
2007         u64 iosize;
2008         u64 unlock_start;
2009         sector_t sector;
2010         struct extent_map *em;
2011         struct block_device *bdev;
2012         int ret;
2013         int nr = 0;
2014         size_t pg_offset = 0;
2015         size_t blocksize;
2016         loff_t i_size = i_size_read(inode);
2017         unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
2018         u64 nr_delalloc;
2019         u64 delalloc_end;
2020
2021         WARN_ON(!PageLocked(page));
2022         pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
2023         if (page->index > end_index ||
2024            (page->index == end_index && !pg_offset)) {
2025                 page->mapping->a_ops->invalidatepage(page, 0);
2026                 unlock_page(page);
2027                 return 0;
2028         }
2029
2030         if (page->index == end_index) {
2031                 char *userpage;
2032
2033                 userpage = kmap_atomic(page, KM_USER0);
2034                 memset(userpage + pg_offset, 0,
2035                        PAGE_CACHE_SIZE - pg_offset);
2036                 kunmap_atomic(userpage, KM_USER0);
2037                 flush_dcache_page(page);
2038         }
2039         pg_offset = 0;
2040
2041         set_page_extent_mapped(page);
2042
2043         delalloc_start = start;
2044         delalloc_end = 0;
2045         while(delalloc_end < page_end) {
2046                 nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start,
2047                                                        &delalloc_end,
2048                                                        128 * 1024 * 1024);
2049                 if (nr_delalloc == 0) {
2050                         delalloc_start = delalloc_end + 1;
2051                         continue;
2052                 }
2053                 tree->ops->fill_delalloc(inode, delalloc_start,
2054                                          delalloc_end);
2055                 clear_extent_bit(tree, delalloc_start,
2056                                  delalloc_end,
2057                                  EXTENT_LOCKED | EXTENT_DELALLOC,
2058                                  1, 0, GFP_NOFS);
2059                 delalloc_start = delalloc_end + 1;
2060         }
2061         lock_extent(tree, start, page_end, GFP_NOFS);
2062         unlock_start = start;
2063
2064         if (tree->ops && tree->ops->writepage_start_hook) {
2065                 ret = tree->ops->writepage_start_hook(page, start, page_end);
2066                 if (ret == -EAGAIN) {
2067                         unlock_extent(tree, start, page_end, GFP_NOFS);
2068                         redirty_page_for_writepage(wbc, page);
2069                         unlock_page(page);
2070                         return 0;
2071                 }
2072         }
2073
2074         end = page_end;
2075         if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
2076                 printk("found delalloc bits after lock_extent\n");
2077         }
2078
2079         if (last_byte <= start) {
2080                 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
2081                 unlock_extent(tree, start, page_end, GFP_NOFS);
2082                 if (tree->ops && tree->ops->writepage_end_io_hook)
2083                         tree->ops->writepage_end_io_hook(page, start,
2084                                                          page_end, NULL, 1);
2085                 unlock_start = page_end + 1;
2086                 goto done;
2087         }
2088
2089         set_extent_uptodate(tree, start, page_end, GFP_NOFS);
2090         blocksize = inode->i_sb->s_blocksize;
2091
2092         while (cur <= end) {
2093                 if (cur >= last_byte) {
2094                         clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
2095                         unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2096                         if (tree->ops && tree->ops->writepage_end_io_hook)
2097                                 tree->ops->writepage_end_io_hook(page, cur,
2098                                                          page_end, NULL, 1);
2099                         unlock_start = page_end + 1;
2100                         break;
2101                 }
2102                 em = epd->get_extent(inode, page, pg_offset, cur,
2103                                      end - cur + 1, 1);
2104                 if (IS_ERR(em) || !em) {
2105                         SetPageError(page);
2106                         break;
2107                 }
2108
2109                 extent_offset = cur - em->start;
2110                 BUG_ON(extent_map_end(em) <= cur);
2111                 BUG_ON(end < cur);
2112                 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2113                 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
2114                 sector = (em->block_start + extent_offset) >> 9;
2115                 bdev = em->bdev;
2116                 block_start = em->block_start;
2117                 free_extent_map(em);
2118                 em = NULL;
2119
2120                 if (block_start == EXTENT_MAP_HOLE ||
2121                     block_start == EXTENT_MAP_INLINE) {
2122                         clear_extent_dirty(tree, cur,
2123                                            cur + iosize - 1, GFP_NOFS);
2124
2125                         unlock_extent(tree, unlock_start, cur + iosize -1,
2126                                       GFP_NOFS);
2127
2128                         if (tree->ops && tree->ops->writepage_end_io_hook)
2129                                 tree->ops->writepage_end_io_hook(page, cur,
2130                                                          cur + iosize - 1,
2131                                                          NULL, 1);
2132                         cur = cur + iosize;
2133                         pg_offset += iosize;
2134                         unlock_start = cur;
2135                         continue;
2136                 }
2137
2138                 /* leave this out until we have a page_mkwrite call */
2139                 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
2140                                    EXTENT_DIRTY, 0)) {
2141                         cur = cur + iosize;
2142                         pg_offset += iosize;
2143                         continue;
2144                 }
2145                 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
2146                 if (tree->ops && tree->ops->writepage_io_hook) {
2147                         ret = tree->ops->writepage_io_hook(page, cur,
2148                                                 cur + iosize - 1);
2149                 } else {
2150                         ret = 0;
2151                 }
2152                 if (ret) {
2153                         SetPageError(page);
2154                 } else {
2155                         unsigned long max_nr = end_index + 1;
2156
2157                         set_range_writeback(tree, cur, cur + iosize - 1);
2158                         if (!PageWriteback(page)) {
2159                                 printk("warning page %lu not writeback, "
2160                                        "cur %llu end %llu\n", page->index,
2161                                        (unsigned long long)cur,
2162                                        (unsigned long long)end);
2163                         }
2164
2165                         ret = submit_extent_page(WRITE, tree, page, sector,
2166                                                  iosize, pg_offset, bdev,
2167                                                  &epd->bio, max_nr,
2168                                                  end_bio_extent_writepage, 0);
2169                         if (ret)
2170                                 SetPageError(page);
2171                 }
2172                 cur = cur + iosize;
2173                 pg_offset += iosize;
2174                 nr++;
2175         }
2176 done:
2177         if (nr == 0) {
2178                 /* make sure the mapping tag for page dirty gets cleared */
2179                 set_page_writeback(page);
2180                 end_page_writeback(page);
2181         }
2182         if (unlock_start <= page_end)
2183                 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2184         unlock_page(page);
2185         return 0;
2186 }
2187
2188 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
2189 /* Taken directly from 2.6.23 for 2.6.18 back port */
2190 typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
2191                                 void *data);
2192
2193 /**
2194  * write_cache_pages - walk the list of dirty pages of the given address space
2195  * and write all of them.
2196  * @mapping: address space structure to write
2197  * @wbc: subtract the number of written pages from *@wbc->nr_to_write
2198  * @writepage: function called for each page
2199  * @data: data passed to writepage function
2200  *
2201  * If a page is already under I/O, write_cache_pages() skips it, even
2202  * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
2203  * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
2204  * and msync() need to guarantee that all the data which was dirty at the time
2205  * the call was made get new I/O started against them.  If wbc->sync_mode is
2206  * WB_SYNC_ALL then we were called for data integrity and we must wait for
2207  * existing IO to complete.
2208  */
2209 static int write_cache_pages(struct address_space *mapping,
2210                       struct writeback_control *wbc, writepage_t writepage,
2211                       void *data)
2212 {
2213         struct backing_dev_info *bdi = mapping->backing_dev_info;
2214         int ret = 0;
2215         int done = 0;
2216         struct pagevec pvec;
2217         int nr_pages;
2218         pgoff_t index;
2219         pgoff_t end;            /* Inclusive */
2220         int scanned = 0;
2221         int range_whole = 0;
2222
2223         if (wbc->nonblocking && bdi_write_congested(bdi)) {
2224                 wbc->encountered_congestion = 1;
2225                 return 0;
2226         }
2227
2228         pagevec_init(&pvec, 0);
2229         if (wbc->range_cyclic) {
2230                 index = mapping->writeback_index; /* Start from prev offset */
2231                 end = -1;
2232         } else {
2233                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2234                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2235                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2236                         range_whole = 1;
2237                 scanned = 1;
2238         }
2239 retry:
2240         while (!done && (index <= end) &&
2241                (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
2242                                               PAGECACHE_TAG_DIRTY,
2243                                               min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
2244                 unsigned i;
2245
2246                 scanned = 1;
2247                 for (i = 0; i < nr_pages; i++) {
2248                         struct page *page = pvec.pages[i];
2249
2250                         /*
2251                          * At this point we hold neither mapping->tree_lock nor
2252                          * lock on the page itself: the page may be truncated or
2253                          * invalidated (changing page->mapping to NULL), or even
2254                          * swizzled back from swapper_space to tmpfs file
2255                          * mapping
2256                          */
2257                         lock_page(page);
2258
2259                         if (unlikely(page->mapping != mapping)) {
2260                                 unlock_page(page);
2261                                 continue;
2262                         }
2263
2264                         if (!wbc->range_cyclic && page->index > end) {
2265                                 done = 1;
2266                                 unlock_page(page);
2267                                 continue;
2268                         }
2269
2270                         if (wbc->sync_mode != WB_SYNC_NONE)
2271                                 wait_on_page_writeback(page);
2272
2273                         if (PageWriteback(page) ||
2274                             !clear_page_dirty_for_io(page)) {
2275                                 unlock_page(page);
2276                                 continue;
2277                         }
2278
2279                         ret = (*writepage)(page, wbc, data);
2280
2281                         if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
2282                                 unlock_page(page);
2283                                 ret = 0;
2284                         }
2285                         if (ret || (--(wbc->nr_to_write) <= 0))
2286                                 done = 1;
2287                         if (wbc->nonblocking && bdi_write_congested(bdi)) {
2288                                 wbc->encountered_congestion = 1;
2289                                 done = 1;
2290                         }
2291                 }
2292                 pagevec_release(&pvec);
2293                 cond_resched();
2294         }
2295         if (!scanned && !done) {
2296                 /*
2297                  * We hit the last page and there is more work to be done: wrap
2298                  * back to the start of the file
2299                  */
2300                 scanned = 1;
2301                 index = 0;
2302                 goto retry;
2303         }
2304         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2305                 mapping->writeback_index = index;
2306         return ret;
2307 }
2308 #endif
2309
2310 int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2311                           get_extent_t *get_extent,
2312                           struct writeback_control *wbc)
2313 {
2314         int ret;
2315         struct address_space *mapping = page->mapping;
2316         struct extent_page_data epd = {
2317                 .bio = NULL,
2318                 .tree = tree,
2319                 .get_extent = get_extent,
2320         };
2321         struct writeback_control wbc_writepages = {
2322                 .bdi            = wbc->bdi,
2323                 .sync_mode      = WB_SYNC_NONE,
2324                 .older_than_this = NULL,
2325                 .nr_to_write    = 64,
2326                 .range_start    = page_offset(page) + PAGE_CACHE_SIZE,
2327                 .range_end      = (loff_t)-1,
2328         };
2329
2330
2331         ret = __extent_writepage(page, wbc, &epd);
2332
2333         write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd);
2334         if (epd.bio) {
2335                 submit_one_bio(WRITE, epd.bio, 0);
2336         }
2337         return ret;
2338 }
2339 EXPORT_SYMBOL(extent_write_full_page);
2340
2341
2342 int extent_writepages(struct extent_io_tree *tree,
2343                       struct address_space *mapping,
2344                       get_extent_t *get_extent,
2345                       struct writeback_control *wbc)
2346 {
2347         int ret = 0;
2348         struct extent_page_data epd = {
2349                 .bio = NULL,
2350                 .tree = tree,
2351                 .get_extent = get_extent,
2352         };
2353
2354         ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd);
2355         if (epd.bio) {
2356                 submit_one_bio(WRITE, epd.bio, 0);
2357         }
2358         return ret;
2359 }
2360 EXPORT_SYMBOL(extent_writepages);
2361
2362 int extent_readpages(struct extent_io_tree *tree,
2363                      struct address_space *mapping,
2364                      struct list_head *pages, unsigned nr_pages,
2365                      get_extent_t get_extent)
2366 {
2367         struct bio *bio = NULL;
2368         unsigned page_idx;
2369         struct pagevec pvec;
2370
2371         pagevec_init(&pvec, 0);
2372         for (page_idx = 0; page_idx < nr_pages; page_idx++) {
2373                 struct page *page = list_entry(pages->prev, struct page, lru);
2374
2375                 prefetchw(&page->flags);
2376                 list_del(&page->lru);
2377                 /*
2378                  * what we want to do here is call add_to_page_cache_lru,
2379                  * but that isn't exported, so we reproduce it here
2380                  */
2381                 if (!add_to_page_cache(page, mapping,
2382                                         page->index, GFP_KERNEL)) {
2383
2384                         /* open coding of lru_cache_add, also not exported */
2385                         page_cache_get(page);
2386                         if (!pagevec_add(&pvec, page))
2387                                 __pagevec_lru_add(&pvec);
2388                         __extent_read_full_page(tree, page, get_extent,
2389                                                 &bio, 0);
2390                 }
2391                 page_cache_release(page);
2392         }
2393         if (pagevec_count(&pvec))
2394                 __pagevec_lru_add(&pvec);
2395         BUG_ON(!list_empty(pages));
2396         if (bio)
2397                 submit_one_bio(READ, bio, 0);
2398         return 0;
2399 }
2400 EXPORT_SYMBOL(extent_readpages);
2401
2402 /*
2403  * basic invalidatepage code, this waits on any locked or writeback
2404  * ranges corresponding to the page, and then deletes any extent state
2405  * records from the tree
2406  */
2407 int extent_invalidatepage(struct extent_io_tree *tree,
2408                           struct page *page, unsigned long offset)
2409 {
2410         u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
2411         u64 end = start + PAGE_CACHE_SIZE - 1;
2412         size_t blocksize = page->mapping->host->i_sb->s_blocksize;
2413
2414         start += (offset + blocksize -1) & ~(blocksize - 1);
2415         if (start > end)
2416                 return 0;
2417
2418         lock_extent(tree, start, end, GFP_NOFS);
2419         wait_on_extent_writeback(tree, start, end);
2420         clear_extent_bit(tree, start, end,
2421                          EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
2422                          1, 1, GFP_NOFS);
2423         return 0;
2424 }
2425 EXPORT_SYMBOL(extent_invalidatepage);
2426
2427 /*
2428  * simple commit_write call, set_range_dirty is used to mark both
2429  * the pages and the extent records as dirty
2430  */
2431 int extent_commit_write(struct extent_io_tree *tree,
2432                         struct inode *inode, struct page *page,
2433                         unsigned from, unsigned to)
2434 {
2435         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2436
2437         set_page_extent_mapped(page);
2438         set_page_dirty(page);
2439
2440         if (pos > inode->i_size) {
2441                 i_size_write(inode, pos);
2442                 mark_inode_dirty(inode);
2443         }
2444         return 0;
2445 }
2446 EXPORT_SYMBOL(extent_commit_write);
2447
2448 int extent_prepare_write(struct extent_io_tree *tree,
2449                          struct inode *inode, struct page *page,
2450                          unsigned from, unsigned to, get_extent_t *get_extent)
2451 {
2452         u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2453         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
2454         u64 block_start;
2455         u64 orig_block_start;
2456         u64 block_end;
2457         u64 cur_end;
2458         struct extent_map *em;
2459         unsigned blocksize = 1 << inode->i_blkbits;
2460         size_t page_offset = 0;
2461         size_t block_off_start;
2462         size_t block_off_end;
2463         int err = 0;
2464         int iocount = 0;
2465         int ret = 0;
2466         int isnew;
2467
2468         set_page_extent_mapped(page);
2469
2470         block_start = (page_start + from) & ~((u64)blocksize - 1);
2471         block_end = (page_start + to - 1) | (blocksize - 1);
2472         orig_block_start = block_start;
2473
2474         lock_extent(tree, page_start, page_end, GFP_NOFS);
2475         while(block_start <= block_end) {
2476                 em = get_extent(inode, page, page_offset, block_start,
2477                                 block_end - block_start + 1, 1);
2478                 if (IS_ERR(em) || !em) {
2479                         goto err;
2480                 }
2481                 cur_end = min(block_end, extent_map_end(em) - 1);
2482                 block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
2483                 block_off_end = block_off_start + blocksize;
2484                 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
2485
2486                 if (!PageUptodate(page) && isnew &&
2487                     (block_off_end > to || block_off_start < from)) {
2488                         void *kaddr;
2489
2490                         kaddr = kmap_atomic(page, KM_USER0);
2491                         if (block_off_end > to)
2492                                 memset(kaddr + to, 0, block_off_end - to);
2493                         if (block_off_start < from)
2494                                 memset(kaddr + block_off_start, 0,
2495                                        from - block_off_start);
2496                         flush_dcache_page(page);
2497                         kunmap_atomic(kaddr, KM_USER0);
2498                 }
2499                 if ((em->block_start != EXTENT_MAP_HOLE &&
2500                      em->block_start != EXTENT_MAP_INLINE) &&
2501                     !isnew && !PageUptodate(page) &&
2502                     (block_off_end > to || block_off_start < from) &&
2503                     !test_range_bit(tree, block_start, cur_end,
2504                                     EXTENT_UPTODATE, 1)) {
2505                         u64 sector;
2506                         u64 extent_offset = block_start - em->start;
2507                         size_t iosize;
2508                         sector = (em->block_start + extent_offset) >> 9;
2509                         iosize = (cur_end - block_start + blocksize) &
2510                                 ~((u64)blocksize - 1);
2511                         /*
2512                          * we've already got the extent locked, but we
2513                          * need to split the state such that our end_bio
2514                          * handler can clear the lock.
2515                          */
2516                         set_extent_bit(tree, block_start,
2517                                        block_start + iosize - 1,
2518                                        EXTENT_LOCKED, 0, NULL, GFP_NOFS);
2519                         ret = submit_extent_page(READ, tree, page,
2520                                          sector, iosize, page_offset, em->bdev,
2521                                          NULL, 1,
2522                                          end_bio_extent_preparewrite, 0);
2523                         iocount++;
2524                         block_start = block_start + iosize;
2525                 } else {
2526                         set_extent_uptodate(tree, block_start, cur_end,
2527                                             GFP_NOFS);
2528                         unlock_extent(tree, block_start, cur_end, GFP_NOFS);
2529                         block_start = cur_end + 1;
2530                 }
2531                 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
2532                 free_extent_map(em);
2533         }
2534         if (iocount) {
2535                 wait_extent_bit(tree, orig_block_start,
2536                                 block_end, EXTENT_LOCKED);
2537         }
2538         check_page_uptodate(tree, page);
2539 err:
2540         /* FIXME, zero out newly allocated blocks on error */
2541         return err;
2542 }
2543 EXPORT_SYMBOL(extent_prepare_write);
2544
2545 /*
2546  * a helper for releasepage, this tests for areas of the page that
2547  * are locked or under IO and drops the related state bits if it is safe
2548  * to drop the page.
2549  */
2550 int try_release_extent_state(struct extent_map_tree *map,
2551                              struct extent_io_tree *tree, struct page *page,
2552                              gfp_t mask)
2553 {
2554         u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2555         u64 end = start + PAGE_CACHE_SIZE - 1;
2556         int ret = 1;
2557
2558         if (test_range_bit(tree, start, end,
2559                            EXTENT_IOBITS | EXTENT_ORDERED, 0))
2560                 ret = 0;
2561         else {
2562                 if ((mask & GFP_NOFS) == GFP_NOFS)
2563                         mask = GFP_NOFS;
2564                 clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
2565                                  1, 1, mask);
2566         }
2567         return ret;
2568 }
2569 EXPORT_SYMBOL(try_release_extent_state);
2570
2571 /*
2572  * a helper for releasepage.  As long as there are no locked extents
2573  * in the range corresponding to the page, both state records and extent
2574  * map records are removed
2575  */
2576 int try_release_extent_mapping(struct extent_map_tree *map,
2577                                struct extent_io_tree *tree, struct page *page,
2578                                gfp_t mask)
2579 {
2580         struct extent_map *em;
2581         u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2582         u64 end = start + PAGE_CACHE_SIZE - 1;
2583
2584         if ((mask & __GFP_WAIT) &&
2585             page->mapping->host->i_size > 16 * 1024 * 1024) {
2586                 u64 len;
2587                 while (start <= end) {
2588                         len = end - start + 1;
2589                         spin_lock(&map->lock);
2590                         em = lookup_extent_mapping(map, start, len);
2591                         if (!em || IS_ERR(em)) {
2592                                 spin_unlock(&map->lock);
2593                                 break;
2594                         }
2595                         if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
2596                             em->start != start) {
2597                                 spin_unlock(&map->lock);
2598                                 free_extent_map(em);
2599                                 break;
2600                         }
2601                         if (!test_range_bit(tree, em->start,
2602                                             extent_map_end(em) - 1,
2603                                             EXTENT_LOCKED, 0)) {
2604                                 remove_extent_mapping(map, em);
2605                                 /* once for the rb tree */
2606                                 free_extent_map(em);
2607                         }
2608                         start = extent_map_end(em);
2609                         spin_unlock(&map->lock);
2610
2611                         /* once for us */
2612                         free_extent_map(em);
2613                 }
2614         }
2615         return try_release_extent_state(map, tree, page, mask);
2616 }
2617 EXPORT_SYMBOL(try_release_extent_mapping);
2618
2619 sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
2620                 get_extent_t *get_extent)
2621 {
2622         struct inode *inode = mapping->host;
2623         u64 start = iblock << inode->i_blkbits;
2624         sector_t sector = 0;
2625         struct extent_map *em;
2626
2627         em = get_extent(inode, NULL, 0, start, (1 << inode->i_blkbits), 0);
2628         if (!em || IS_ERR(em))
2629                 return 0;
2630
2631         if (em->block_start == EXTENT_MAP_INLINE ||
2632             em->block_start == EXTENT_MAP_HOLE)
2633                 goto out;
2634
2635         sector = (em->block_start + start - em->start) >> inode->i_blkbits;
2636 out:
2637         free_extent_map(em);
2638         return sector;
2639 }
2640
2641 static inline struct page *extent_buffer_page(struct extent_buffer *eb,
2642                                               unsigned long i)
2643 {
2644         struct page *p;
2645         struct address_space *mapping;
2646
2647         if (i == 0)
2648                 return eb->first_page;
2649         i += eb->start >> PAGE_CACHE_SHIFT;
2650         mapping = eb->first_page->mapping;
2651         if (!mapping)
2652                 return NULL;
2653
2654         /*
2655          * extent_buffer_page is only called after pinning the page
2656          * by increasing the reference count.  So we know the page must
2657          * be in the radix tree.
2658          */
2659 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
2660         rcu_read_lock();
2661 #else
2662         read_lock_irq(&mapping->tree_lock);
2663 #endif
2664         p = radix_tree_lookup(&mapping->page_tree, i);
2665
2666 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
2667         rcu_read_unlock();
2668 #else
2669         read_unlock_irq(&mapping->tree_lock);
2670 #endif
2671         return p;
2672 }
2673
2674 static inline unsigned long num_extent_pages(u64 start, u64 len)
2675 {
2676         return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
2677                 (start >> PAGE_CACHE_SHIFT);
2678 }
2679
2680 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
2681                                                    u64 start,
2682                                                    unsigned long len,
2683                                                    gfp_t mask)
2684 {
2685         struct extent_buffer *eb = NULL;
2686         unsigned long flags;
2687
2688         eb = kmem_cache_zalloc(extent_buffer_cache, mask);
2689         eb->start = start;
2690         eb->len = len;
2691         mutex_init(&eb->mutex);
2692         spin_lock_irqsave(&leak_lock, flags);
2693         list_add(&eb->leak_list, &buffers);
2694         spin_unlock_irqrestore(&leak_lock, flags);
2695         atomic_set(&eb->refs, 1);
2696
2697         return eb;
2698 }
2699
2700 static void __free_extent_buffer(struct extent_buffer *eb)
2701 {
2702         unsigned long flags;
2703         spin_lock_irqsave(&leak_lock, flags);
2704         list_del(&eb->leak_list);
2705         spin_unlock_irqrestore(&leak_lock, flags);
2706         kmem_cache_free(extent_buffer_cache, eb);
2707 }
2708
2709 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
2710                                           u64 start, unsigned long len,
2711                                           struct page *page0,
2712                                           gfp_t mask)
2713 {
2714         unsigned long num_pages = num_extent_pages(start, len);
2715         unsigned long i;
2716         unsigned long index = start >> PAGE_CACHE_SHIFT;
2717         struct extent_buffer *eb;
2718         struct extent_buffer *exists = NULL;
2719         struct page *p;
2720         struct address_space *mapping = tree->mapping;
2721         int uptodate = 1;
2722
2723         spin_lock(&tree->buffer_lock);
2724         eb = buffer_search(tree, start);
2725         if (eb) {
2726                 atomic_inc(&eb->refs);
2727                 spin_unlock(&tree->buffer_lock);
2728                 return eb;
2729         }
2730         spin_unlock(&tree->buffer_lock);
2731
2732         eb = __alloc_extent_buffer(tree, start, len, mask);
2733         if (!eb)
2734                 return NULL;
2735
2736         if (page0) {
2737                 eb->first_page = page0;
2738                 i = 1;
2739                 index++;
2740                 page_cache_get(page0);
2741                 mark_page_accessed(page0);
2742                 set_page_extent_mapped(page0);
2743                 set_page_extent_head(page0, len);
2744                 uptodate = PageUptodate(page0);
2745         } else {
2746                 i = 0;
2747         }
2748         for (; i < num_pages; i++, index++) {
2749                 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
2750                 if (!p) {
2751                         WARN_ON(1);
2752                         goto free_eb;
2753                 }
2754                 set_page_extent_mapped(p);
2755                 mark_page_accessed(p);
2756                 if (i == 0) {
2757                         eb->first_page = p;
2758                         set_page_extent_head(p, len);
2759                 } else {
2760                         set_page_private(p, EXTENT_PAGE_PRIVATE);
2761                 }
2762                 if (!PageUptodate(p))
2763                         uptodate = 0;
2764                 unlock_page(p);
2765         }
2766         if (uptodate)
2767                 eb->flags |= EXTENT_UPTODATE;
2768         eb->flags |= EXTENT_BUFFER_FILLED;
2769
2770         spin_lock(&tree->buffer_lock);
2771         exists = buffer_tree_insert(tree, start, &eb->rb_node);
2772         if (exists) {
2773                 /* add one reference for the caller */
2774                 atomic_inc(&exists->refs);
2775                 spin_unlock(&tree->buffer_lock);
2776                 goto free_eb;
2777         }
2778         spin_unlock(&tree->buffer_lock);
2779
2780         /* add one reference for the tree */
2781         atomic_inc(&eb->refs);
2782         return eb;
2783
2784 free_eb:
2785         if (!atomic_dec_and_test(&eb->refs))
2786                 return exists;
2787         for (index = 1; index < i; index++)
2788                 page_cache_release(extent_buffer_page(eb, index));
2789         page_cache_release(extent_buffer_page(eb, 0));
2790         __free_extent_buffer(eb);
2791         return exists;
2792 }
2793 EXPORT_SYMBOL(alloc_extent_buffer);
2794
2795 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
2796                                          u64 start, unsigned long len,
2797                                           gfp_t mask)
2798 {
2799         struct extent_buffer *eb;
2800
2801         spin_lock(&tree->buffer_lock);
2802         eb = buffer_search(tree, start);
2803         if (eb)
2804                 atomic_inc(&eb->refs);
2805         spin_unlock(&tree->buffer_lock);
2806
2807         return eb;
2808 }
2809 EXPORT_SYMBOL(find_extent_buffer);
2810
2811 void free_extent_buffer(struct extent_buffer *eb)
2812 {
2813         if (!eb)
2814                 return;
2815
2816         if (!atomic_dec_and_test(&eb->refs))
2817                 return;
2818
2819         WARN_ON(1);
2820 }
2821 EXPORT_SYMBOL(free_extent_buffer);
2822
2823 int clear_extent_buffer_dirty(struct extent_io_tree *tree,
2824                               struct extent_buffer *eb)
2825 {
2826         int set;
2827         unsigned long i;
2828         unsigned long num_pages;
2829         struct page *page;
2830
2831         u64 start = eb->start;
2832         u64 end = start + eb->len - 1;
2833
2834         set = clear_extent_dirty(tree, start, end, GFP_NOFS);
2835         num_pages = num_extent_pages(eb->start, eb->len);
2836
2837         for (i = 0; i < num_pages; i++) {
2838                 page = extent_buffer_page(eb, i);
2839                 lock_page(page);
2840                 if (i == 0)
2841                         set_page_extent_head(page, eb->len);
2842                 else
2843                         set_page_private(page, EXTENT_PAGE_PRIVATE);
2844
2845                 /*
2846                  * if we're on the last page or the first page and the
2847                  * block isn't aligned on a page boundary, do extra checks
2848                  * to make sure we don't clean page that is partially dirty
2849                  */
2850                 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
2851                     ((i == num_pages - 1) &&
2852                      ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
2853                         start = (u64)page->index << PAGE_CACHE_SHIFT;
2854                         end  = start + PAGE_CACHE_SIZE - 1;
2855                         if (test_range_bit(tree, start, end,
2856                                            EXTENT_DIRTY, 0)) {
2857                                 unlock_page(page);
2858                                 continue;
2859                         }
2860                 }
2861                 clear_page_dirty_for_io(page);
2862 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
2863                 spin_lock_irq(&page->mapping->tree_lock);
2864 #else
2865                 read_lock_irq(&page->mapping->tree_lock);
2866 #endif
2867                 if (!PageDirty(page)) {
2868                         radix_tree_tag_clear(&page->mapping->page_tree,
2869                                                 page_index(page),
2870                                                 PAGECACHE_TAG_DIRTY);
2871                 }
2872 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
2873                 spin_unlock_irq(&page->mapping->tree_lock);
2874 #else
2875                 read_unlock_irq(&page->mapping->tree_lock);
2876 #endif
2877                 unlock_page(page);
2878         }
2879         return 0;
2880 }
2881 EXPORT_SYMBOL(clear_extent_buffer_dirty);
2882
2883 int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
2884                                     struct extent_buffer *eb)
2885 {
2886         return wait_on_extent_writeback(tree, eb->start,
2887                                         eb->start + eb->len - 1);
2888 }
2889 EXPORT_SYMBOL(wait_on_extent_buffer_writeback);
2890
2891 int set_extent_buffer_dirty(struct extent_io_tree *tree,
2892                              struct extent_buffer *eb)
2893 {
2894         unsigned long i;
2895         unsigned long num_pages;
2896
2897         num_pages = num_extent_pages(eb->start, eb->len);
2898         for (i = 0; i < num_pages; i++) {
2899                 struct page *page = extent_buffer_page(eb, i);
2900                 /* writepage may need to do something special for the
2901                  * first page, we have to make sure page->private is
2902                  * properly set.  releasepage may drop page->private
2903                  * on us if the page isn't already dirty.
2904                  */
2905                 if (i == 0) {
2906                         lock_page(page);
2907                         set_page_extent_head(page, eb->len);
2908                 } else if (PagePrivate(page) &&
2909                            page->private != EXTENT_PAGE_PRIVATE) {
2910                         lock_page(page);
2911                         set_page_extent_mapped(page);
2912                         unlock_page(page);
2913                 }
2914                 __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
2915                 if (i == 0)
2916                         unlock_page(page);
2917         }
2918         return set_extent_dirty(tree, eb->start,
2919                                 eb->start + eb->len - 1, GFP_NOFS);
2920 }
2921 EXPORT_SYMBOL(set_extent_buffer_dirty);
2922
2923 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
2924                                 struct extent_buffer *eb)
2925 {
2926         unsigned long i;
2927         struct page *page;
2928         unsigned long num_pages;
2929
2930         num_pages = num_extent_pages(eb->start, eb->len);
2931         eb->flags &= ~EXTENT_UPTODATE;
2932
2933         clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
2934                               GFP_NOFS);
2935         for (i = 0; i < num_pages; i++) {
2936                 page = extent_buffer_page(eb, i);
2937                 if (page)
2938                         ClearPageUptodate(page);
2939         }
2940         return 0;
2941 }
2942
2943 int set_extent_buffer_uptodate(struct extent_io_tree *tree,
2944                                 struct extent_buffer *eb)
2945 {
2946         unsigned long i;
2947         struct page *page;
2948         unsigned long num_pages;
2949
2950         num_pages = num_extent_pages(eb->start, eb->len);
2951
2952         set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
2953                             GFP_NOFS);
2954         for (i = 0; i < num_pages; i++) {
2955                 page = extent_buffer_page(eb, i);
2956                 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
2957                     ((i == num_pages - 1) &&
2958                      ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
2959                         check_page_uptodate(tree, page);
2960                         continue;
2961                 }
2962                 SetPageUptodate(page);
2963         }
2964         return 0;
2965 }
2966 EXPORT_SYMBOL(set_extent_buffer_uptodate);
2967
2968 int extent_range_uptodate(struct extent_io_tree *tree,
2969                           u64 start, u64 end)
2970 {
2971         struct page *page;
2972         int ret;
2973         int pg_uptodate = 1;
2974         int uptodate;
2975         unsigned long index;
2976
2977         ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1);
2978         if (ret)
2979                 return 1;
2980         while(start <= end) {
2981                 index = start >> PAGE_CACHE_SHIFT;
2982                 page = find_get_page(tree->mapping, index);
2983                 uptodate = PageUptodate(page);
2984                 page_cache_release(page);
2985                 if (!uptodate) {
2986                         pg_uptodate = 0;
2987                         break;
2988                 }
2989                 start += PAGE_CACHE_SIZE;
2990         }
2991         return pg_uptodate;
2992 }
2993
2994 int extent_buffer_uptodate(struct extent_io_tree *tree,
2995                            struct extent_buffer *eb)
2996 {
2997         int ret = 0;
2998         unsigned long num_pages;
2999         unsigned long i;
3000         struct page *page;
3001         int pg_uptodate = 1;
3002
3003         if (eb->flags & EXTENT_UPTODATE)
3004                 return 1;
3005
3006         ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3007                            EXTENT_UPTODATE, 1);
3008         if (ret)
3009                 return ret;
3010
3011         num_pages = num_extent_pages(eb->start, eb->len);
3012         for (i = 0; i < num_pages; i++) {
3013                 page = extent_buffer_page(eb, i);
3014                 if (!PageUptodate(page)) {
3015                         pg_uptodate = 0;
3016                         break;
3017                 }
3018         }
3019         return pg_uptodate;
3020 }
3021 EXPORT_SYMBOL(extent_buffer_uptodate);
3022
3023 int read_extent_buffer_pages(struct extent_io_tree *tree,
3024                              struct extent_buffer *eb,
3025                              u64 start, int wait,
3026                              get_extent_t *get_extent, int mirror_num)
3027 {
3028         unsigned long i;
3029         unsigned long start_i;
3030         struct page *page;
3031         int err;
3032         int ret = 0;
3033         int locked_pages = 0;
3034         int all_uptodate = 1;
3035         int inc_all_pages = 0;
3036         unsigned long num_pages;
3037         struct bio *bio = NULL;
3038
3039         if (eb->flags & EXTENT_UPTODATE)
3040                 return 0;
3041
3042         if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3043                            EXTENT_UPTODATE, 1)) {
3044                 return 0;
3045         }
3046
3047         if (start) {
3048                 WARN_ON(start < eb->start);
3049                 start_i = (start >> PAGE_CACHE_SHIFT) -
3050                         (eb->start >> PAGE_CACHE_SHIFT);
3051         } else {
3052                 start_i = 0;
3053         }
3054
3055         num_pages = num_extent_pages(eb->start, eb->len);
3056         for (i = start_i; i < num_pages; i++) {
3057                 page = extent_buffer_page(eb, i);
3058                 if (!wait) {
3059                         if (!trylock_page(page))
3060                                 goto unlock_exit;
3061                 } else {
3062                         lock_page(page);
3063                 }
3064                 locked_pages++;
3065                 if (!PageUptodate(page)) {
3066                         all_uptodate = 0;
3067                 }
3068         }
3069         if (all_uptodate) {
3070                 if (start_i == 0)
3071                         eb->flags |= EXTENT_UPTODATE;
3072                 goto unlock_exit;
3073         }
3074
3075         for (i = start_i; i < num_pages; i++) {
3076                 page = extent_buffer_page(eb, i);
3077                 if (inc_all_pages)
3078                         page_cache_get(page);
3079                 if (!PageUptodate(page)) {
3080                         if (start_i == 0)
3081                                 inc_all_pages = 1;
3082                         ClearPageError(page);
3083                         err = __extent_read_full_page(tree, page,
3084                                                       get_extent, &bio,
3085                                                       mirror_num);
3086                         if (err) {
3087                                 ret = err;
3088                         }
3089                 } else {
3090                         unlock_page(page);
3091                 }
3092         }
3093
3094         if (bio)
3095                 submit_one_bio(READ, bio, mirror_num);
3096
3097         if (ret || !wait) {
3098                 return ret;
3099         }
3100         for (i = start_i; i < num_pages; i++) {
3101                 page = extent_buffer_page(eb, i);
3102                 wait_on_page_locked(page);
3103                 if (!PageUptodate(page)) {
3104                         ret = -EIO;
3105                 }
3106         }
3107         if (!ret)
3108                 eb->flags |= EXTENT_UPTODATE;
3109         return ret;
3110
3111 unlock_exit:
3112         i = start_i;
3113         while(locked_pages > 0) {
3114                 page = extent_buffer_page(eb, i);
3115                 i++;
3116                 unlock_page(page);
3117                 locked_pages--;
3118         }
3119         return ret;
3120 }
3121 EXPORT_SYMBOL(read_extent_buffer_pages);
3122
3123 void read_extent_buffer(struct extent_buffer *eb, void *dstv,
3124                         unsigned long start,
3125                         unsigned long len)
3126 {
3127         size_t cur;
3128         size_t offset;
3129         struct page *page;
3130         char *kaddr;
3131         char *dst = (char *)dstv;
3132         size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3133         unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3134
3135         WARN_ON(start > eb->len);
3136         WARN_ON(start + len > eb->start + eb->len);
3137
3138         offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3139
3140         while(len > 0) {
3141                 page = extent_buffer_page(eb, i);
3142
3143                 cur = min(len, (PAGE_CACHE_SIZE - offset));
3144                 kaddr = kmap_atomic(page, KM_USER1);
3145                 memcpy(dst, kaddr + offset, cur);
3146                 kunmap_atomic(kaddr, KM_USER1);
3147
3148                 dst += cur;
3149                 len -= cur;
3150                 offset = 0;
3151                 i++;
3152         }
3153 }
3154 EXPORT_SYMBOL(read_extent_buffer);
3155
3156 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
3157                                unsigned long min_len, char **token, char **map,
3158                                unsigned long *map_start,
3159                                unsigned long *map_len, int km)
3160 {
3161         size_t offset = start & (PAGE_CACHE_SIZE - 1);
3162         char *kaddr;
3163         struct page *p;
3164         size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3165         unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3166         unsigned long end_i = (start_offset + start + min_len - 1) >>
3167                 PAGE_CACHE_SHIFT;
3168
3169         if (i != end_i)
3170                 return -EINVAL;
3171
3172         if (i == 0) {
3173                 offset = start_offset;
3174                 *map_start = 0;
3175         } else {
3176                 offset = 0;
3177                 *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
3178         }
3179         if (start + min_len > eb->len) {
3180 printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len);
3181                 WARN_ON(1);
3182         }
3183
3184         p = extent_buffer_page(eb, i);
3185         kaddr = kmap_atomic(p, km);
3186         *token = kaddr;
3187         *map = kaddr + offset;
3188         *map_len = PAGE_CACHE_SIZE - offset;
3189         return 0;
3190 }
3191 EXPORT_SYMBOL(map_private_extent_buffer);
3192
3193 int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
3194                       unsigned long min_len,
3195                       char **token, char **map,
3196                       unsigned long *map_start,
3197                       unsigned long *map_len, int km)
3198 {
3199         int err;
3200         int save = 0;
3201         if (eb->map_token) {
3202                 unmap_extent_buffer(eb, eb->map_token, km);
3203                 eb->map_token = NULL;
3204                 save = 1;
3205         }
3206         err = map_private_extent_buffer(eb, start, min_len, token, map,
3207                                        map_start, map_len, km);
3208         if (!err && save) {
3209                 eb->map_token = *token;
3210                 eb->kaddr = *map;
3211                 eb->map_start = *map_start;
3212                 eb->map_len = *map_len;
3213         }
3214         return err;
3215 }
3216 EXPORT_SYMBOL(map_extent_buffer);
3217
3218 void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
3219 {
3220         kunmap_atomic(token, km);
3221 }
3222 EXPORT_SYMBOL(unmap_extent_buffer);
3223
3224 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
3225                           unsigned long start,
3226                           unsigned long len)
3227 {
3228         size_t cur;
3229         size_t offset;
3230         struct page *page;
3231         char *kaddr;
3232         char *ptr = (char *)ptrv;
3233         size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3234         unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3235         int ret = 0;
3236
3237         WARN_ON(start > eb->len);
3238         WARN_ON(start + len > eb->start + eb->len);
3239
3240         offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3241
3242         while(len > 0) {
3243                 page = extent_buffer_page(eb, i);
3244
3245                 cur = min(len, (PAGE_CACHE_SIZE - offset));
3246
3247                 kaddr = kmap_atomic(page, KM_USER0);
3248                 ret = memcmp(ptr, kaddr + offset, cur);
3249                 kunmap_atomic(kaddr, KM_USER0);
3250                 if (ret)
3251                         break;
3252
3253                 ptr += cur;
3254                 len -= cur;
3255                 offset = 0;
3256                 i++;
3257         }
3258         return ret;
3259 }
3260 EXPORT_SYMBOL(memcmp_extent_buffer);
3261
3262 void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
3263                          unsigned long start, unsigned long len)
3264 {
3265         size_t cur;
3266         size_t offset;
3267         struct page *page;
3268         char *kaddr;
3269         char *src = (char *)srcv;
3270         size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3271         unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3272
3273         WARN_ON(start > eb->len);
3274         WARN_ON(start + len > eb->start + eb->len);
3275
3276         offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3277
3278         while(len > 0) {
3279                 page = extent_buffer_page(eb, i);
3280                 WARN_ON(!PageUptodate(page));
3281
3282                 cur = min(len, PAGE_CACHE_SIZE - offset);
3283                 kaddr = kmap_atomic(page, KM_USER1);
3284                 memcpy(kaddr + offset, src, cur);
3285                 kunmap_atomic(kaddr, KM_USER1);
3286
3287                 src += cur;
3288                 len -= cur;
3289                 offset = 0;
3290                 i++;
3291         }
3292 }
3293 EXPORT_SYMBOL(write_extent_buffer);
3294
3295 void memset_extent_buffer(struct extent_buffer *eb, char c,
3296                           unsigned long start, unsigned long len)
3297 {
3298         size_t cur;
3299         size_t offset;
3300         struct page *page;
3301         char *kaddr;
3302         size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3303         unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3304
3305         WARN_ON(start > eb->len);
3306         WARN_ON(start + len > eb->start + eb->len);
3307
3308         offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3309
3310         while(len > 0) {
3311                 page = extent_buffer_page(eb, i);
3312                 WARN_ON(!PageUptodate(page));
3313
3314                 cur = min(len, PAGE_CACHE_SIZE - offset);
3315                 kaddr = kmap_atomic(page, KM_USER0);
3316                 memset(kaddr + offset, c, cur);
3317                 kunmap_atomic(kaddr, KM_USER0);
3318
3319                 len -= cur;
3320                 offset = 0;
3321                 i++;
3322         }
3323 }
3324 EXPORT_SYMBOL(memset_extent_buffer);
3325
3326 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
3327                         unsigned long dst_offset, unsigned long src_offset,
3328                         unsigned long len)
3329 {
3330         u64 dst_len = dst->len;
3331         size_t cur;
3332         size_t offset;
3333         struct page *page;
3334         char *kaddr;
3335         size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3336         unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
3337
3338         WARN_ON(src->len != dst_len);
3339
3340         offset = (start_offset + dst_offset) &
3341                 ((unsigned long)PAGE_CACHE_SIZE - 1);
3342
3343         while(len > 0) {
3344                 page = extent_buffer_page(dst, i);
3345                 WARN_ON(!PageUptodate(page));
3346
3347                 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
3348
3349                 kaddr = kmap_atomic(page, KM_USER0);
3350                 read_extent_buffer(src, kaddr + offset, src_offset, cur);
3351                 kunmap_atomic(kaddr, KM_USER0);
3352
3353                 src_offset += cur;
3354                 len -= cur;
3355                 offset = 0;
3356                 i++;
3357         }
3358 }
3359 EXPORT_SYMBOL(copy_extent_buffer);
3360
3361 static void move_pages(struct page *dst_page, struct page *src_page,
3362                        unsigned long dst_off, unsigned long src_off,
3363                        unsigned long len)
3364 {
3365         char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3366         if (dst_page == src_page) {
3367                 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
3368         } else {
3369                 char *src_kaddr = kmap_atomic(src_page, KM_USER1);
3370                 char *p = dst_kaddr + dst_off + len;
3371                 char *s = src_kaddr + src_off + len;
3372
3373                 while (len--)
3374                         *--p = *--s;
3375
3376                 kunmap_atomic(src_kaddr, KM_USER1);
3377         }
3378         kunmap_atomic(dst_kaddr, KM_USER0);
3379 }
3380
3381 static void copy_pages(struct page *dst_page, struct page *src_page,
3382                        unsigned long dst_off, unsigned long src_off,
3383                        unsigned long len)
3384 {
3385         char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3386         char *src_kaddr;
3387
3388         if (dst_page != src_page)
3389                 src_kaddr = kmap_atomic(src_page, KM_USER1);
3390         else
3391                 src_kaddr = dst_kaddr;
3392
3393         memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
3394         kunmap_atomic(dst_kaddr, KM_USER0);
3395         if (dst_page != src_page)
3396                 kunmap_atomic(src_kaddr, KM_USER1);
3397 }
3398
3399 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3400                            unsigned long src_offset, unsigned long len)
3401 {
3402         size_t cur;
3403         size_t dst_off_in_page;
3404         size_t src_off_in_page;
3405         size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3406         unsigned long dst_i;
3407         unsigned long src_i;
3408
3409         if (src_offset + len > dst->len) {
3410                 printk("memmove bogus src_offset %lu move len %lu len %lu\n",
3411                        src_offset, len, dst->len);
3412                 BUG_ON(1);
3413         }
3414         if (dst_offset + len > dst->len) {
3415                 printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
3416                        dst_offset, len, dst->len);
3417                 BUG_ON(1);
3418         }
3419
3420         while(len > 0) {
3421                 dst_off_in_page = (start_offset + dst_offset) &
3422                         ((unsigned long)PAGE_CACHE_SIZE - 1);
3423                 src_off_in_page = (start_offset + src_offset) &
3424                         ((unsigned long)PAGE_CACHE_SIZE - 1);
3425
3426                 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
3427                 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
3428
3429                 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
3430                                                src_off_in_page));
3431                 cur = min_t(unsigned long, cur,
3432                         (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
3433
3434                 copy_pages(extent_buffer_page(dst, dst_i),
3435                            extent_buffer_page(dst, src_i),
3436                            dst_off_in_page, src_off_in_page, cur);
3437
3438                 src_offset += cur;
3439                 dst_offset += cur;
3440                 len -= cur;
3441         }
3442 }
3443 EXPORT_SYMBOL(memcpy_extent_buffer);
3444
3445 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3446                            unsigned long src_offset, unsigned long len)
3447 {
3448         size_t cur;
3449         size_t dst_off_in_page;
3450         size_t src_off_in_page;
3451         unsigned long dst_end = dst_offset + len - 1;
3452         unsigned long src_end = src_offset + len - 1;
3453         size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3454         unsigned long dst_i;
3455         unsigned long src_i;
3456
3457         if (src_offset + len > dst->len) {
3458                 printk("memmove bogus src_offset %lu move len %lu len %lu\n",
3459                        src_offset, len, dst->len);
3460                 BUG_ON(1);
3461         }
3462         if (dst_offset + len > dst->len) {
3463                 printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
3464                        dst_offset, len, dst->len);
3465                 BUG_ON(1);
3466         }
3467         if (dst_offset < src_offset) {
3468                 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
3469                 return;
3470         }
3471         while(len > 0) {
3472                 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
3473                 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
3474
3475                 dst_off_in_page = (start_offset + dst_end) &
3476                         ((unsigned long)PAGE_CACHE_SIZE - 1);
3477                 src_off_in_page = (start_offset + src_end) &
3478                         ((unsigned long)PAGE_CACHE_SIZE - 1);
3479
3480                 cur = min_t(unsigned long, len, src_off_in_page + 1);
3481                 cur = min(cur, dst_off_in_page + 1);
3482                 move_pages(extent_buffer_page(dst, dst_i),
3483                            extent_buffer_page(dst, src_i),
3484                            dst_off_in_page - cur + 1,
3485                            src_off_in_page - cur + 1, cur);
3486
3487                 dst_end -= cur;
3488                 src_end -= cur;
3489                 len -= cur;
3490         }
3491 }
3492 EXPORT_SYMBOL(memmove_extent_buffer);
3493
3494 int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
3495 {
3496         u64 start = page_offset(page);
3497         struct extent_buffer *eb;
3498         int ret = 1;
3499         unsigned long i;
3500         unsigned long num_pages;
3501
3502         spin_lock(&tree->buffer_lock);
3503         eb = buffer_search(tree, start);
3504         if (!eb)
3505                 goto out;
3506
3507         if (atomic_read(&eb->refs) > 1) {
3508                 ret = 0;
3509                 goto out;
3510         }
3511         /* at this point we can safely release the extent buffer */
3512         num_pages = num_extent_pages(eb->start, eb->len);
3513         for (i = 0; i < num_pages; i++) {
3514                 struct page *page = extent_buffer_page(eb, i);
3515                 page_cache_release(page);
3516         }
3517         rb_erase(&eb->rb_node, &tree->buffer);
3518         __free_extent_buffer(eb);
3519 out:
3520         spin_unlock(&tree->buffer_lock);
3521         return ret;
3522 }
3523 EXPORT_SYMBOL(try_release_extent_buffer);