]> Pileus Git - ~andy/linux/blob - fs/gfs2/log.c
Linux 3.14
[~andy/linux] / fs / gfs2 / log.c
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License version 2.
8  */
9
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/gfs2_ondisk.h>
16 #include <linux/crc32.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/freezer.h>
20 #include <linux/bio.h>
21 #include <linux/writeback.h>
22 #include <linux/list_sort.h>
23
24 #include "gfs2.h"
25 #include "incore.h"
26 #include "bmap.h"
27 #include "glock.h"
28 #include "log.h"
29 #include "lops.h"
30 #include "meta_io.h"
31 #include "util.h"
32 #include "dir.h"
33 #include "trace_gfs2.h"
34
35 /**
36  * gfs2_struct2blk - compute stuff
37  * @sdp: the filesystem
38  * @nstruct: the number of structures
39  * @ssize: the size of the structures
40  *
41  * Compute the number of log descriptor blocks needed to hold a certain number
42  * of structures of a certain size.
43  *
44  * Returns: the number of blocks needed (minimum is always 1)
45  */
46
47 unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
48                              unsigned int ssize)
49 {
50         unsigned int blks;
51         unsigned int first, second;
52
53         blks = 1;
54         first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / ssize;
55
56         if (nstruct > first) {
57                 second = (sdp->sd_sb.sb_bsize -
58                           sizeof(struct gfs2_meta_header)) / ssize;
59                 blks += DIV_ROUND_UP(nstruct - first, second);
60         }
61
62         return blks;
63 }
64
65 /**
66  * gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters
67  * @mapping: The associated mapping (maybe NULL)
68  * @bd: The gfs2_bufdata to remove
69  *
70  * The ail lock _must_ be held when calling this function
71  *
72  */
73
74 void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
75 {
76         bd->bd_tr = NULL;
77         list_del_init(&bd->bd_ail_st_list);
78         list_del_init(&bd->bd_ail_gl_list);
79         atomic_dec(&bd->bd_gl->gl_ail_count);
80         brelse(bd->bd_bh);
81 }
82
83 /**
84  * gfs2_ail1_start_one - Start I/O on a part of the AIL
85  * @sdp: the filesystem
86  * @wbc: The writeback control structure
87  * @ai: The ail structure
88  *
89  */
90
91 static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
92                                struct writeback_control *wbc,
93                                struct gfs2_trans *tr)
94 __releases(&sdp->sd_ail_lock)
95 __acquires(&sdp->sd_ail_lock)
96 {
97         struct gfs2_glock *gl = NULL;
98         struct address_space *mapping;
99         struct gfs2_bufdata *bd, *s;
100         struct buffer_head *bh;
101
102         list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) {
103                 bh = bd->bd_bh;
104
105                 gfs2_assert(sdp, bd->bd_tr == tr);
106
107                 if (!buffer_busy(bh)) {
108                         if (!buffer_uptodate(bh))
109                                 gfs2_io_error_bh(sdp, bh);
110                         list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
111                         continue;
112                 }
113
114                 if (!buffer_dirty(bh))
115                         continue;
116                 if (gl == bd->bd_gl)
117                         continue;
118                 gl = bd->bd_gl;
119                 list_move(&bd->bd_ail_st_list, &tr->tr_ail1_list);
120                 mapping = bh->b_page->mapping;
121                 if (!mapping)
122                         continue;
123                 spin_unlock(&sdp->sd_ail_lock);
124                 generic_writepages(mapping, wbc);
125                 spin_lock(&sdp->sd_ail_lock);
126                 if (wbc->nr_to_write <= 0)
127                         break;
128                 return 1;
129         }
130
131         return 0;
132 }
133
134
135 /**
136  * gfs2_ail1_flush - start writeback of some ail1 entries 
137  * @sdp: The super block
138  * @wbc: The writeback control structure
139  *
140  * Writes back some ail1 entries, according to the limits in the
141  * writeback control structure
142  */
143
144 void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
145 {
146         struct list_head *head = &sdp->sd_ail1_list;
147         struct gfs2_trans *tr;
148
149         trace_gfs2_ail_flush(sdp, wbc, 1);
150         spin_lock(&sdp->sd_ail_lock);
151 restart:
152         list_for_each_entry_reverse(tr, head, tr_list) {
153                 if (wbc->nr_to_write <= 0)
154                         break;
155                 if (gfs2_ail1_start_one(sdp, wbc, tr))
156                         goto restart;
157         }
158         spin_unlock(&sdp->sd_ail_lock);
159         trace_gfs2_ail_flush(sdp, wbc, 0);
160 }
161
162 /**
163  * gfs2_ail1_start - start writeback of all ail1 entries
164  * @sdp: The superblock
165  */
166
167 static void gfs2_ail1_start(struct gfs2_sbd *sdp)
168 {
169         struct writeback_control wbc = {
170                 .sync_mode = WB_SYNC_NONE,
171                 .nr_to_write = LONG_MAX,
172                 .range_start = 0,
173                 .range_end = LLONG_MAX,
174         };
175
176         return gfs2_ail1_flush(sdp, &wbc);
177 }
178
179 /**
180  * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
181  * @sdp: the filesystem
182  * @ai: the AIL entry
183  *
184  */
185
186 static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
187 {
188         struct gfs2_bufdata *bd, *s;
189         struct buffer_head *bh;
190
191         list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list,
192                                          bd_ail_st_list) {
193                 bh = bd->bd_bh;
194                 gfs2_assert(sdp, bd->bd_tr == tr);
195                 if (buffer_busy(bh))
196                         continue;
197                 if (!buffer_uptodate(bh))
198                         gfs2_io_error_bh(sdp, bh);
199                 list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
200         }
201
202 }
203
204 /**
205  * gfs2_ail1_empty - Try to empty the ail1 lists
206  * @sdp: The superblock
207  *
208  * Tries to empty the ail1 lists, starting with the oldest first
209  */
210
211 static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
212 {
213         struct gfs2_trans *tr, *s;
214         int oldest_tr = 1;
215         int ret;
216
217         spin_lock(&sdp->sd_ail_lock);
218         list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) {
219                 gfs2_ail1_empty_one(sdp, tr);
220                 if (list_empty(&tr->tr_ail1_list) && oldest_tr)
221                         list_move(&tr->tr_list, &sdp->sd_ail2_list);
222                 else
223                         oldest_tr = 0;
224         }
225         ret = list_empty(&sdp->sd_ail1_list);
226         spin_unlock(&sdp->sd_ail_lock);
227
228         return ret;
229 }
230
231 static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
232 {
233         struct gfs2_trans *tr;
234         struct gfs2_bufdata *bd;
235         struct buffer_head *bh;
236
237         spin_lock(&sdp->sd_ail_lock);
238         list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
239                 list_for_each_entry(bd, &tr->tr_ail1_list, bd_ail_st_list) {
240                         bh = bd->bd_bh;
241                         if (!buffer_locked(bh))
242                                 continue;
243                         get_bh(bh);
244                         spin_unlock(&sdp->sd_ail_lock);
245                         wait_on_buffer(bh);
246                         brelse(bh);
247                         return;
248                 }
249         }
250         spin_unlock(&sdp->sd_ail_lock);
251 }
252
253 /**
254  * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
255  * @sdp: the filesystem
256  * @ai: the AIL entry
257  *
258  */
259
260 static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
261 {
262         struct list_head *head = &tr->tr_ail2_list;
263         struct gfs2_bufdata *bd;
264
265         while (!list_empty(head)) {
266                 bd = list_entry(head->prev, struct gfs2_bufdata,
267                                 bd_ail_st_list);
268                 gfs2_assert(sdp, bd->bd_tr == tr);
269                 gfs2_remove_from_ail(bd);
270         }
271 }
272
273 static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
274 {
275         struct gfs2_trans *tr, *safe;
276         unsigned int old_tail = sdp->sd_log_tail;
277         int wrap = (new_tail < old_tail);
278         int a, b, rm;
279
280         spin_lock(&sdp->sd_ail_lock);
281
282         list_for_each_entry_safe(tr, safe, &sdp->sd_ail2_list, tr_list) {
283                 a = (old_tail <= tr->tr_first);
284                 b = (tr->tr_first < new_tail);
285                 rm = (wrap) ? (a || b) : (a && b);
286                 if (!rm)
287                         continue;
288
289                 gfs2_ail2_empty_one(sdp, tr);
290                 list_del(&tr->tr_list);
291                 gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
292                 gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
293                 kfree(tr);
294         }
295
296         spin_unlock(&sdp->sd_ail_lock);
297 }
298
299 /**
300  * gfs2_log_reserve - Make a log reservation
301  * @sdp: The GFS2 superblock
302  * @blks: The number of blocks to reserve
303  *
304  * Note that we never give out the last few blocks of the journal. Thats
305  * due to the fact that there is a small number of header blocks
306  * associated with each log flush. The exact number can't be known until
307  * flush time, so we ensure that we have just enough free blocks at all
308  * times to avoid running out during a log flush.
309  *
310  * We no longer flush the log here, instead we wake up logd to do that
311  * for us. To avoid the thundering herd and to ensure that we deal fairly
312  * with queued waiters, we use an exclusive wait. This means that when we
313  * get woken with enough journal space to get our reservation, we need to
314  * wake the next waiter on the list.
315  *
316  * Returns: errno
317  */
318
319 int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
320 {
321         unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize);
322         unsigned wanted = blks + reserved_blks;
323         DEFINE_WAIT(wait);
324         int did_wait = 0;
325         unsigned int free_blocks;
326
327         if (gfs2_assert_warn(sdp, blks) ||
328             gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
329                 return -EINVAL;
330 retry:
331         free_blocks = atomic_read(&sdp->sd_log_blks_free);
332         if (unlikely(free_blocks <= wanted)) {
333                 do {
334                         prepare_to_wait_exclusive(&sdp->sd_log_waitq, &wait,
335                                         TASK_UNINTERRUPTIBLE);
336                         wake_up(&sdp->sd_logd_waitq);
337                         did_wait = 1;
338                         if (atomic_read(&sdp->sd_log_blks_free) <= wanted)
339                                 io_schedule();
340                         free_blocks = atomic_read(&sdp->sd_log_blks_free);
341                 } while(free_blocks <= wanted);
342                 finish_wait(&sdp->sd_log_waitq, &wait);
343         }
344         if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks,
345                                 free_blocks - blks) != free_blocks)
346                 goto retry;
347         trace_gfs2_log_blocks(sdp, -blks);
348
349         /*
350          * If we waited, then so might others, wake them up _after_ we get
351          * our share of the log.
352          */
353         if (unlikely(did_wait))
354                 wake_up(&sdp->sd_log_waitq);
355
356         down_read(&sdp->sd_log_flush_lock);
357
358         return 0;
359 }
360
361 /**
362  * log_distance - Compute distance between two journal blocks
363  * @sdp: The GFS2 superblock
364  * @newer: The most recent journal block of the pair
365  * @older: The older journal block of the pair
366  *
367  *   Compute the distance (in the journal direction) between two
368  *   blocks in the journal
369  *
370  * Returns: the distance in blocks
371  */
372
373 static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer,
374                                         unsigned int older)
375 {
376         int dist;
377
378         dist = newer - older;
379         if (dist < 0)
380                 dist += sdp->sd_jdesc->jd_blocks;
381
382         return dist;
383 }
384
385 /**
386  * calc_reserved - Calculate the number of blocks to reserve when
387  *                 refunding a transaction's unused buffers.
388  * @sdp: The GFS2 superblock
389  *
390  * This is complex.  We need to reserve room for all our currently used
391  * metadata buffers (e.g. normal file I/O rewriting file time stamps) and 
392  * all our journaled data buffers for journaled files (e.g. files in the 
393  * meta_fs like rindex, or files for which chattr +j was done.)
394  * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush
395  * will count it as free space (sd_log_blks_free) and corruption will follow.
396  *
397  * We can have metadata bufs and jdata bufs in the same journal.  So each
398  * type gets its own log header, for which we need to reserve a block.
399  * In fact, each type has the potential for needing more than one header 
400  * in cases where we have more buffers than will fit on a journal page.
401  * Metadata journal entries take up half the space of journaled buffer entries.
402  * Thus, metadata entries have buf_limit (502) and journaled buffers have
403  * databuf_limit (251) before they cause a wrap around.
404  *
405  * Also, we need to reserve blocks for revoke journal entries and one for an
406  * overall header for the lot.
407  *
408  * Returns: the number of blocks reserved
409  */
410 static unsigned int calc_reserved(struct gfs2_sbd *sdp)
411 {
412         unsigned int reserved = 0;
413         unsigned int mbuf_limit, metabufhdrs_needed;
414         unsigned int dbuf_limit, databufhdrs_needed;
415         unsigned int revokes = 0;
416
417         mbuf_limit = buf_limit(sdp);
418         metabufhdrs_needed = (sdp->sd_log_commited_buf +
419                               (mbuf_limit - 1)) / mbuf_limit;
420         dbuf_limit = databuf_limit(sdp);
421         databufhdrs_needed = (sdp->sd_log_commited_databuf +
422                               (dbuf_limit - 1)) / dbuf_limit;
423
424         if (sdp->sd_log_commited_revoke > 0)
425                 revokes = gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
426                                           sizeof(u64));
427
428         reserved = sdp->sd_log_commited_buf + metabufhdrs_needed +
429                 sdp->sd_log_commited_databuf + databufhdrs_needed +
430                 revokes;
431         /* One for the overall header */
432         if (reserved)
433                 reserved++;
434         return reserved;
435 }
436
437 static unsigned int current_tail(struct gfs2_sbd *sdp)
438 {
439         struct gfs2_trans *tr;
440         unsigned int tail;
441
442         spin_lock(&sdp->sd_ail_lock);
443
444         if (list_empty(&sdp->sd_ail1_list)) {
445                 tail = sdp->sd_log_head;
446         } else {
447                 tr = list_entry(sdp->sd_ail1_list.prev, struct gfs2_trans,
448                                 tr_list);
449                 tail = tr->tr_first;
450         }
451
452         spin_unlock(&sdp->sd_ail_lock);
453
454         return tail;
455 }
456
457 static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
458 {
459         unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
460
461         ail2_empty(sdp, new_tail);
462
463         atomic_add(dist, &sdp->sd_log_blks_free);
464         trace_gfs2_log_blocks(sdp, dist);
465         gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
466                              sdp->sd_jdesc->jd_blocks);
467
468         sdp->sd_log_tail = new_tail;
469 }
470
471
472 static void log_flush_wait(struct gfs2_sbd *sdp)
473 {
474         DEFINE_WAIT(wait);
475
476         if (atomic_read(&sdp->sd_log_in_flight)) {
477                 do {
478                         prepare_to_wait(&sdp->sd_log_flush_wait, &wait,
479                                         TASK_UNINTERRUPTIBLE);
480                         if (atomic_read(&sdp->sd_log_in_flight))
481                                 io_schedule();
482                 } while(atomic_read(&sdp->sd_log_in_flight));
483                 finish_wait(&sdp->sd_log_flush_wait, &wait);
484         }
485 }
486
487 static int ip_cmp(void *priv, struct list_head *a, struct list_head *b)
488 {
489         struct gfs2_inode *ipa, *ipb;
490
491         ipa = list_entry(a, struct gfs2_inode, i_ordered);
492         ipb = list_entry(b, struct gfs2_inode, i_ordered);
493
494         if (ipa->i_no_addr < ipb->i_no_addr)
495                 return -1;
496         if (ipa->i_no_addr > ipb->i_no_addr)
497                 return 1;
498         return 0;
499 }
500
501 static void gfs2_ordered_write(struct gfs2_sbd *sdp)
502 {
503         struct gfs2_inode *ip;
504         LIST_HEAD(written);
505
506         spin_lock(&sdp->sd_ordered_lock);
507         list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
508         while (!list_empty(&sdp->sd_log_le_ordered)) {
509                 ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
510                 list_move(&ip->i_ordered, &written);
511                 if (ip->i_inode.i_mapping->nrpages == 0)
512                         continue;
513                 spin_unlock(&sdp->sd_ordered_lock);
514                 filemap_fdatawrite(ip->i_inode.i_mapping);
515                 spin_lock(&sdp->sd_ordered_lock);
516         }
517         list_splice(&written, &sdp->sd_log_le_ordered);
518         spin_unlock(&sdp->sd_ordered_lock);
519 }
520
521 static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
522 {
523         struct gfs2_inode *ip;
524
525         spin_lock(&sdp->sd_ordered_lock);
526         while (!list_empty(&sdp->sd_log_le_ordered)) {
527                 ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered);
528                 list_del(&ip->i_ordered);
529                 WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags));
530                 if (ip->i_inode.i_mapping->nrpages == 0)
531                         continue;
532                 spin_unlock(&sdp->sd_ordered_lock);
533                 filemap_fdatawait(ip->i_inode.i_mapping);
534                 spin_lock(&sdp->sd_ordered_lock);
535         }
536         spin_unlock(&sdp->sd_ordered_lock);
537 }
538
539 void gfs2_ordered_del_inode(struct gfs2_inode *ip)
540 {
541         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
542
543         spin_lock(&sdp->sd_ordered_lock);
544         if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags))
545                 list_del(&ip->i_ordered);
546         spin_unlock(&sdp->sd_ordered_lock);
547 }
548
549 void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
550 {
551         struct buffer_head *bh = bd->bd_bh;
552         struct gfs2_glock *gl = bd->bd_gl;
553
554         bh->b_private = NULL;
555         bd->bd_blkno = bh->b_blocknr;
556         gfs2_remove_from_ail(bd); /* drops ref on bh */
557         bd->bd_bh = NULL;
558         bd->bd_ops = &gfs2_revoke_lops;
559         sdp->sd_log_num_revoke++;
560         atomic_inc(&gl->gl_revokes);
561         set_bit(GLF_LFLUSH, &gl->gl_flags);
562         list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
563 }
564
565 void gfs2_write_revokes(struct gfs2_sbd *sdp)
566 {
567         struct gfs2_trans *tr;
568         struct gfs2_bufdata *bd, *tmp;
569         int have_revokes = 0;
570         int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
571
572         gfs2_ail1_empty(sdp);
573         spin_lock(&sdp->sd_ail_lock);
574         list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) {
575                 list_for_each_entry(bd, &tr->tr_ail2_list, bd_ail_st_list) {
576                         if (list_empty(&bd->bd_list)) {
577                                 have_revokes = 1;
578                                 goto done;
579                         }
580                 }
581         }
582 done:
583         spin_unlock(&sdp->sd_ail_lock);
584         if (have_revokes == 0)
585                 return;
586         while (sdp->sd_log_num_revoke > max_revokes)
587                 max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
588         max_revokes -= sdp->sd_log_num_revoke;
589         if (!sdp->sd_log_num_revoke) {
590                 atomic_dec(&sdp->sd_log_blks_free);
591                 /* If no blocks have been reserved, we need to also
592                  * reserve a block for the header */
593                 if (!sdp->sd_log_blks_reserved)
594                         atomic_dec(&sdp->sd_log_blks_free);
595         }
596         gfs2_log_lock(sdp);
597         spin_lock(&sdp->sd_ail_lock);
598         list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) {
599                 list_for_each_entry_safe(bd, tmp, &tr->tr_ail2_list, bd_ail_st_list) {
600                         if (max_revokes == 0)
601                                 goto out_of_blocks;
602                         if (!list_empty(&bd->bd_list))
603                                 continue;
604                         gfs2_add_revoke(sdp, bd);
605                         max_revokes--;
606                 }
607         }
608 out_of_blocks:
609         spin_unlock(&sdp->sd_ail_lock);
610         gfs2_log_unlock(sdp);
611
612         if (!sdp->sd_log_num_revoke) {
613                 atomic_inc(&sdp->sd_log_blks_free);
614                 if (!sdp->sd_log_blks_reserved)
615                         atomic_inc(&sdp->sd_log_blks_free);
616         }
617 }
618
619 /**
620  * log_write_header - Get and initialize a journal header buffer
621  * @sdp: The GFS2 superblock
622  *
623  * Returns: the initialized log buffer descriptor
624  */
625
626 static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
627 {
628         struct gfs2_log_header *lh;
629         unsigned int tail;
630         u32 hash;
631         int rw = WRITE_FLUSH_FUA | REQ_META;
632         struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
633         lh = page_address(page);
634         clear_page(lh);
635
636         tail = current_tail(sdp);
637
638         lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
639         lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
640         lh->lh_header.__pad0 = cpu_to_be64(0);
641         lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
642         lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
643         lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
644         lh->lh_flags = cpu_to_be32(flags);
645         lh->lh_tail = cpu_to_be32(tail);
646         lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
647         hash = gfs2_disk_hash(page_address(page), sizeof(struct gfs2_log_header));
648         lh->lh_hash = cpu_to_be32(hash);
649
650         if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
651                 gfs2_ordered_wait(sdp);
652                 log_flush_wait(sdp);
653                 rw = WRITE_SYNC | REQ_META | REQ_PRIO;
654         }
655
656         sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
657         gfs2_log_write_page(sdp, page);
658         gfs2_log_flush_bio(sdp, rw);
659         log_flush_wait(sdp);
660
661         if (sdp->sd_log_tail != tail)
662                 log_pull_tail(sdp, tail);
663 }
664
665 /**
666  * gfs2_log_flush - flush incore transaction(s)
667  * @sdp: the filesystem
668  * @gl: The glock structure to flush.  If NULL, flush the whole incore log
669  *
670  */
671
672 void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
673 {
674         struct gfs2_trans *tr;
675
676         down_write(&sdp->sd_log_flush_lock);
677
678         /* Log might have been flushed while we waited for the flush lock */
679         if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) {
680                 up_write(&sdp->sd_log_flush_lock);
681                 return;
682         }
683         trace_gfs2_log_flush(sdp, 1);
684
685         tr = sdp->sd_log_tr;
686         if (tr) {
687                 sdp->sd_log_tr = NULL;
688                 INIT_LIST_HEAD(&tr->tr_ail1_list);
689                 INIT_LIST_HEAD(&tr->tr_ail2_list);
690         }
691
692         if (sdp->sd_log_num_buf != sdp->sd_log_commited_buf) {
693                 printk(KERN_INFO "GFS2: log buf %u %u\n", sdp->sd_log_num_buf,
694                        sdp->sd_log_commited_buf);
695                 gfs2_assert_withdraw(sdp, 0);
696         }
697         if (sdp->sd_log_num_databuf != sdp->sd_log_commited_databuf) {
698                 printk(KERN_INFO "GFS2: log databuf %u %u\n",
699                        sdp->sd_log_num_databuf, sdp->sd_log_commited_databuf);
700                 gfs2_assert_withdraw(sdp, 0);
701         }
702         gfs2_assert_withdraw(sdp,
703                         sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
704
705         sdp->sd_log_flush_head = sdp->sd_log_head;
706         sdp->sd_log_flush_wrapped = 0;
707         if (tr)
708                 tr->tr_first = sdp->sd_log_flush_head;
709
710         gfs2_ordered_write(sdp);
711         lops_before_commit(sdp);
712         gfs2_log_flush_bio(sdp, WRITE);
713
714         if (sdp->sd_log_head != sdp->sd_log_flush_head) {
715                 log_write_header(sdp, 0);
716         } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
717                 atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
718                 trace_gfs2_log_blocks(sdp, -1);
719                 log_write_header(sdp, 0);
720         }
721         lops_after_commit(sdp, tr);
722
723         gfs2_log_lock(sdp);
724         sdp->sd_log_head = sdp->sd_log_flush_head;
725         sdp->sd_log_blks_reserved = 0;
726         sdp->sd_log_commited_buf = 0;
727         sdp->sd_log_commited_databuf = 0;
728         sdp->sd_log_commited_revoke = 0;
729
730         spin_lock(&sdp->sd_ail_lock);
731         if (tr && !list_empty(&tr->tr_ail1_list)) {
732                 list_add(&tr->tr_list, &sdp->sd_ail1_list);
733                 tr = NULL;
734         }
735         spin_unlock(&sdp->sd_ail_lock);
736         gfs2_log_unlock(sdp);
737         trace_gfs2_log_flush(sdp, 0);
738         up_write(&sdp->sd_log_flush_lock);
739
740         kfree(tr);
741 }
742
743 static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
744 {
745         unsigned int reserved;
746         unsigned int unused;
747
748         gfs2_log_lock(sdp);
749
750         sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
751         sdp->sd_log_commited_databuf += tr->tr_num_databuf_new -
752                 tr->tr_num_databuf_rm;
753         gfs2_assert_withdraw(sdp, (((int)sdp->sd_log_commited_buf) >= 0) ||
754                              (((int)sdp->sd_log_commited_databuf) >= 0));
755         sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
756         reserved = calc_reserved(sdp);
757         gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved);
758         unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved;
759         atomic_add(unused, &sdp->sd_log_blks_free);
760         trace_gfs2_log_blocks(sdp, unused);
761         gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
762                              sdp->sd_jdesc->jd_blocks);
763         sdp->sd_log_blks_reserved = reserved;
764
765         if (sdp->sd_log_tr == NULL &&
766             (tr->tr_num_buf_new || tr->tr_num_databuf_new)) {
767                 gfs2_assert_withdraw(sdp, tr->tr_t_gh.gh_gl);
768                 sdp->sd_log_tr = tr;
769                 tr->tr_attached = 1;
770         }
771         gfs2_log_unlock(sdp);
772 }
773
774 /**
775  * gfs2_log_commit - Commit a transaction to the log
776  * @sdp: the filesystem
777  * @tr: the transaction
778  *
779  * We wake up gfs2_logd if the number of pinned blocks exceed thresh1
780  * or the total number of used blocks (pinned blocks plus AIL blocks)
781  * is greater than thresh2.
782  *
783  * At mount time thresh1 is 1/3rd of journal size, thresh2 is 2/3rd of
784  * journal size.
785  *
786  * Returns: errno
787  */
788
789 void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
790 {
791         log_refund(sdp, tr);
792
793         if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) ||
794             ((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) >
795             atomic_read(&sdp->sd_log_thresh2)))
796                 wake_up(&sdp->sd_logd_waitq);
797 }
798
799 /**
800  * gfs2_log_shutdown - write a shutdown header into a journal
801  * @sdp: the filesystem
802  *
803  */
804
805 void gfs2_log_shutdown(struct gfs2_sbd *sdp)
806 {
807         down_write(&sdp->sd_log_flush_lock);
808
809         gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
810         gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
811         gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
812         gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
813         gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
814         gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
815
816         sdp->sd_log_flush_head = sdp->sd_log_head;
817         sdp->sd_log_flush_wrapped = 0;
818
819         log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT);
820
821         gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks);
822         gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
823         gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
824
825         sdp->sd_log_head = sdp->sd_log_flush_head;
826         sdp->sd_log_tail = sdp->sd_log_head;
827
828         up_write(&sdp->sd_log_flush_lock);
829 }
830
831
832 /**
833  * gfs2_meta_syncfs - sync all the buffers in a filesystem
834  * @sdp: the filesystem
835  *
836  */
837
838 void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
839 {
840         gfs2_log_flush(sdp, NULL);
841         for (;;) {
842                 gfs2_ail1_start(sdp);
843                 gfs2_ail1_wait(sdp);
844                 if (gfs2_ail1_empty(sdp))
845                         break;
846         }
847         gfs2_log_flush(sdp, NULL);
848 }
849
850 static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
851 {
852         return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1));
853 }
854
855 static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
856 {
857         unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
858         return used_blocks >= atomic_read(&sdp->sd_log_thresh2);
859 }
860
861 /**
862  * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
863  * @sdp: Pointer to GFS2 superblock
864  *
865  * Also, periodically check to make sure that we're using the most recent
866  * journal index.
867  */
868
869 int gfs2_logd(void *data)
870 {
871         struct gfs2_sbd *sdp = data;
872         unsigned long t = 1;
873         DEFINE_WAIT(wait);
874
875         while (!kthread_should_stop()) {
876
877                 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
878                         gfs2_ail1_empty(sdp);
879                         gfs2_log_flush(sdp, NULL);
880                 }
881
882                 if (gfs2_ail_flush_reqd(sdp)) {
883                         gfs2_ail1_start(sdp);
884                         gfs2_ail1_wait(sdp);
885                         gfs2_ail1_empty(sdp);
886                         gfs2_log_flush(sdp, NULL);
887                 }
888
889                 if (!gfs2_ail_flush_reqd(sdp))
890                         wake_up(&sdp->sd_log_waitq);
891
892                 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
893
894                 try_to_freeze();
895
896                 do {
897                         prepare_to_wait(&sdp->sd_logd_waitq, &wait,
898                                         TASK_INTERRUPTIBLE);
899                         if (!gfs2_ail_flush_reqd(sdp) &&
900                             !gfs2_jrnl_flush_reqd(sdp) &&
901                             !kthread_should_stop())
902                                 t = schedule_timeout(t);
903                 } while(t && !gfs2_ail_flush_reqd(sdp) &&
904                         !gfs2_jrnl_flush_reqd(sdp) &&
905                         !kthread_should_stop());
906                 finish_wait(&sdp->sd_logd_waitq, &wait);
907         }
908
909         return 0;
910 }
911