]> Pileus Git - ~andy/linux/blobdiff - fs/jbd2/transaction.c
jbd2: cleanup needed free block estimates when starting a transaction
[~andy/linux] / fs / jbd2 / transaction.c
index 10f524c59ea88d48bf4f85f42e6fb2eca0d7a55b..f9cd43190b43ca498b05c7ea2036024ac2610803 100644 (file)
@@ -283,22 +283,12 @@ repeat:
         * reduce the free space arbitrarily.  Be careful to account for
         * those buffers when checkpointing.
         */
-
-       /*
-        * @@@ AKPM: This seems rather over-defensive.  We're giving commit
-        * a _lot_ of headroom: 1/4 of the journal plus the size of
-        * the committing transaction.  Really, we only need to give it
-        * committing_transaction->t_outstanding_credits plus "enough" for
-        * the log control blocks.
-        * Also, this test is inconsistent with the matching one in
-        * jbd2_journal_extend().
-        */
-       if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
+       if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) {
                jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
                atomic_sub(nblocks, &transaction->t_outstanding_credits);
                read_unlock(&journal->j_state_lock);
                write_lock(&journal->j_state_lock);
-               if (__jbd2_log_space_left(journal) < jbd_space_needed(journal))
+               if (jbd2_log_space_left(journal) < jbd2_space_needed(journal))
                        __jbd2_log_wait_for_space(journal);
                write_unlock(&journal->j_state_lock);
                goto repeat;
@@ -316,7 +306,7 @@ repeat:
        jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
                  handle, nblocks,
                  atomic_read(&transaction->t_outstanding_credits),
-                 __jbd2_log_space_left(journal));
+                 jbd2_log_space_left(journal));
        read_unlock(&journal->j_state_lock);
 
        lock_map_acquire(&handle->h_lockdep_map);
@@ -451,7 +441,8 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
                goto unlock;
        }
 
-       if (wanted > __jbd2_log_space_left(journal)) {
+       if (wanted + (wanted >> JBD2_CONTROL_BLOCKS_SHIFT) >
+           jbd2_log_space_left(journal)) {
                jbd_debug(3, "denied handle %p %d blocks: "
                          "insufficient log space\n", handle, nblocks);
                goto unlock;
@@ -619,6 +610,12 @@ static void warn_dirty_buffer(struct buffer_head *bh)
               bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
 }
 
+static int sleep_on_shadow_bh(void *word)
+{
+       io_schedule();
+       return 0;
+}
+
 /*
  * If the buffer is already part of the current transaction, then there
  * is nothing we need to do.  If it is already part of a prior
@@ -754,41 +751,29 @@ repeat:
                 * journaled.  If the primary copy is already going to
                 * disk then we cannot do copy-out here. */
 
-               if (jh->b_jlist == BJ_Shadow) {
-                       DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow);
-                       wait_queue_head_t *wqh;
-
-                       wqh = bit_waitqueue(&bh->b_state, BH_Unshadow);
-
+               if (buffer_shadow(bh)) {
                        JBUFFER_TRACE(jh, "on shadow: sleep");
                        jbd_unlock_bh_state(bh);
-                       /* commit wakes up all shadow buffers after IO */
-                       for ( ; ; ) {
-                               prepare_to_wait(wqh, &wait.wait,
-                                               TASK_UNINTERRUPTIBLE);
-                               if (jh->b_jlist != BJ_Shadow)
-                                       break;
-                               schedule();
-                       }
-                       finish_wait(wqh, &wait.wait);
+                       wait_on_bit(&bh->b_state, BH_Shadow,
+                                   sleep_on_shadow_bh, TASK_UNINTERRUPTIBLE);
                        goto repeat;
                }
 
-               /* Only do the copy if the currently-owning transaction
-                * still needs it.  If it is on the Forget list, the
-                * committing transaction is past that stage.  The
-                * buffer had better remain locked during the kmalloc,
-                * but that should be true --- we hold the journal lock
-                * still and the buffer is already on the BUF_JOURNAL
-                * list so won't be flushed.
+               /*
+                * Only do the copy if the currently-owning transaction still
+                * needs it. If buffer isn't on BJ_Metadata list, the
+                * committing transaction is past that stage (here we use the
+                * fact that BH_Shadow is set under bh_state lock together with
+                * refiling to BJ_Shadow list and at this point we know the
+                * buffer doesn't have BH_Shadow set).
                 *
                 * Subtle point, though: if this is a get_undo_access,
                 * then we will be relying on the frozen_data to contain
                 * the new value of the committed_data record after the
                 * transaction, so we HAVE to force the frozen_data copy
-                * in that case. */
-
-               if (jh->b_jlist != BJ_Forget || force_copy) {
+                * in that case.
+                */
+               if (jh->b_jlist == BJ_Metadata || force_copy) {
                        JBUFFER_TRACE(jh, "generate frozen data");
                        if (!frozen_buffer) {
                                JBUFFER_TRACE(jh, "allocate memory for buffer");
@@ -1601,10 +1586,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
  * Remove a buffer from the appropriate transaction list.
  *
  * Note that this function can *change* the value of
- * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list,
- * t_log_list or t_reserved_list.  If the caller is holding onto a copy of one
- * of these pointers, it could go bad.  Generally the caller needs to re-read
- * the pointer from the transaction_t.
+ * bh->b_transaction->t_buffers, t_forget, t_shadow_list, t_log_list or
+ * t_reserved_list.  If the caller is holding onto a copy of one of these
+ * pointers, it could go bad.  Generally the caller needs to re-read the
+ * pointer from the transaction_t.
  *
  * Called under j_list_lock.
  */
@@ -1634,15 +1619,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
        case BJ_Forget:
                list = &transaction->t_forget;
                break;
-       case BJ_IO:
-               list = &transaction->t_iobuf_list;
-               break;
        case BJ_Shadow:
                list = &transaction->t_shadow_list;
                break;
-       case BJ_LogCtl:
-               list = &transaction->t_log_list;
-               break;
        case BJ_Reserved:
                list = &transaction->t_reserved_list;
                break;
@@ -2034,18 +2013,23 @@ zap_buffer_unlocked:
  * void jbd2_journal_invalidatepage()
  * @journal: journal to use for flush...
  * @page:    page to flush
- * @offset:  length of page to invalidate.
+ * @offset:  start of the range to invalidate
+ * @length:  length of the range to invalidate
  *
- * Reap page buffers containing data after offset in page. Can return -EBUSY
- * if buffers are part of the committing transaction and the page is straddling
- * i_size. Caller then has to wait for current commit and try again.
+ * Reap page buffers containing data after in the specified range in page.
+ * Can return -EBUSY if buffers are part of the committing transaction and
+ * the page is straddling i_size. Caller then has to wait for current commit
+ * and try again.
  */
 int jbd2_journal_invalidatepage(journal_t *journal,
                                struct page *page,
-                               unsigned long offset)
+                               unsigned int offset,
+                               unsigned int length)
 {
        struct buffer_head *head, *bh, *next;
+       unsigned int stop = offset + length;
        unsigned int curr_off = 0;
+       int partial_page = (offset || length < PAGE_CACHE_SIZE);
        int may_free = 1;
        int ret = 0;
 
@@ -2054,6 +2038,8 @@ int jbd2_journal_invalidatepage(journal_t *journal,
        if (!page_has_buffers(page))
                return 0;
 
+       BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
+
        /* We will potentially be playing with lists other than just the
         * data lists (especially for journaled data mode), so be
         * cautious in our locking. */
@@ -2063,10 +2049,13 @@ int jbd2_journal_invalidatepage(journal_t *journal,
                unsigned int next_off = curr_off + bh->b_size;
                next = bh->b_this_page;
 
+               if (next_off > stop)
+                       return 0;
+
                if (offset <= curr_off) {
                        /* This block is wholly outside the truncation point */
                        lock_buffer(bh);
-                       ret = journal_unmap_buffer(journal, bh, offset > 0);
+                       ret = journal_unmap_buffer(journal, bh, partial_page);
                        unlock_buffer(bh);
                        if (ret < 0)
                                return ret;
@@ -2077,7 +2066,7 @@ int jbd2_journal_invalidatepage(journal_t *journal,
 
        } while (bh != head);
 
-       if (!offset) {
+       if (!partial_page) {
                if (may_free && try_to_free_buffers(page))
                        J_ASSERT(!page_has_buffers(page));
        }
@@ -2138,15 +2127,9 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
        case BJ_Forget:
                list = &transaction->t_forget;
                break;
-       case BJ_IO:
-               list = &transaction->t_iobuf_list;
-               break;
        case BJ_Shadow:
                list = &transaction->t_shadow_list;
                break;
-       case BJ_LogCtl:
-               list = &transaction->t_log_list;
-               break;
        case BJ_Reserved:
                list = &transaction->t_reserved_list;
                break;