X-Git-Url: http://pileus.org/git/?a=blobdiff_plain;ds=sidebyside;f=mm%2Fpage-writeback.c;h=5e00f1772c20fc6d9e4e70a087800e93b3ae3df2;hb=d684b21f89b96af3adc06877f29fd9f5214b23c8;hp=4073d531cd7b6ad8486330f02edcea9a16e9d5e7;hpb=3e26c149c358529b1605f8959341d34bc4b880a3;p=~andy%2Flinux diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4073d531cd7..5e00f1772c2 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -37,7 +37,7 @@ /* * The maximum number of pages to writeout in a single bdflush/kupdate - * operation. We do this so we don't hold I_LOCK against an inode for + * operation. We do this so we don't hold I_SYNC against an inode for * enormous amounts of time, which would block a userspace task which has * been forced to throttle against that inode. Also, the code reevaluates * the dirty each time it has written this many pages. @@ -68,6 +68,12 @@ static inline long sync_writeback_pages(void) */ int dirty_background_ratio = 5; +/* + * free highmem will not be subtracted from the total free memory + * for calculating free ratios if vm_highmem_is_dirtyable is true + */ +int vm_highmem_is_dirtyable; + /* * The generator of dirty data starts writeback at this percentage */ @@ -219,7 +225,7 @@ static inline void task_dirties_fraction(struct task_struct *tsk, * * dirty -= (dirty/8) * p_{t} */ -void task_dirty_limit(struct task_struct *tsk, long *pdirty) +static void task_dirty_limit(struct task_struct *tsk, long *pdirty) { long numerator, denominator; long dirty = *pdirty; @@ -287,7 +293,10 @@ static unsigned long determine_dirtyable_memory(void) x = global_page_state(NR_FREE_PAGES) + global_page_state(NR_INACTIVE) + global_page_state(NR_ACTIVE); - x -= highmem_dirtyable_memory(x); + + if (!vm_highmem_is_dirtyable) + x -= highmem_dirtyable_memory(x); + return x + 1; /* Ensure that we never return 0 */ } @@ -297,20 +306,12 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, { int background_ratio; /* Percentages */ int dirty_ratio; - int unmapped_ratio; long background; long dirty; unsigned long available_memory = determine_dirtyable_memory(); struct task_struct *tsk; - unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) + - global_page_state(NR_ANON_PAGES)) * 100) / - available_memory; - dirty_ratio = vm_dirty_ratio; - if (dirty_ratio > unmapped_ratio / 2) - dirty_ratio = unmapped_ratio / 2; - if (dirty_ratio < 5) dirty_ratio = 5; @@ -355,8 +356,8 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, */ static void balance_dirty_pages(struct address_space *mapping) { - long bdi_nr_reclaimable; - long bdi_nr_writeback; + long nr_reclaimable, bdi_nr_reclaimable; + long nr_writeback, bdi_nr_writeback; long background_thresh; long dirty_thresh; long bdi_thresh; @@ -376,11 +377,26 @@ static void balance_dirty_pages(struct address_space *mapping) get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); + + nr_reclaimable = global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS); + nr_writeback = global_page_state(NR_WRITEBACK); + bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK); + if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh) break; + /* + * Throttle it only when the background writeback cannot + * catch-up. This avoids (excessively) small writeouts + * when the bdi limits are ramping up. + */ + if (nr_reclaimable + nr_writeback < + (background_thresh + dirty_thresh) / 2) + break; + if (!bdi->dirty_exceeded) bdi->dirty_exceeded = 1; @@ -502,16 +518,6 @@ void throttle_vm_writeout(gfp_t gfp_mask) long background_thresh; long dirty_thresh; - if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO)) { - /* - * The caller might hold locks which can prevent IO completion - * or progress in the filesystem. So we cannot just sit here - * waiting for IO to complete. - */ - congestion_wait(WRITE, HZ/10); - return; - } - for ( ; ; ) { get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); @@ -525,6 +531,14 @@ void throttle_vm_writeout(gfp_t gfp_mask) global_page_state(NR_WRITEBACK) <= dirty_thresh) break; congestion_wait(WRITE, HZ/10); + + /* + * The caller might hold locks which can prevent IO completion + * or progress in the filesystem. So we cannot just sit here + * waiting for IO to complete. + */ + if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO)) + break; } } @@ -553,6 +567,7 @@ static void background_writeout(unsigned long _min_pages) global_page_state(NR_UNSTABLE_NFS) < background_thresh && min_pages <= 0) break; + wbc.more_io = 0; wbc.encountered_congestion = 0; wbc.nr_to_write = MAX_WRITEBACK_PAGES; wbc.pages_skipped = 0; @@ -560,8 +575,9 @@ static void background_writeout(unsigned long _min_pages) min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { /* Wrote less than expected */ - congestion_wait(WRITE, HZ/10); - if (!wbc.encountered_congestion) + if (wbc.encountered_congestion || wbc.more_io) + congestion_wait(WRITE, HZ/10); + else break; } } @@ -626,11 +642,12 @@ static void wb_kupdate(unsigned long arg) global_page_state(NR_UNSTABLE_NFS) + (inodes_stat.nr_inodes - inodes_stat.nr_unused); while (nr_to_write > 0) { + wbc.more_io = 0; wbc.encountered_congestion = 0; wbc.nr_to_write = MAX_WRITEBACK_PAGES; writeback_inodes(&wbc); if (wbc.nr_to_write > 0) { - if (wbc.encountered_congestion) + if (wbc.encountered_congestion || wbc.more_io) congestion_wait(WRITE, HZ/10); else break; /* All the old data is written */ @@ -849,8 +866,10 @@ retry: ret = (*writepage)(page, wbc, data); - if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { unlock_page(page); + ret = 0; + } if (ret || (--(wbc->nr_to_write) <= 0)) done = 1; if (wbc->nonblocking && bdi_write_congested(bdi)) { @@ -986,7 +1005,7 @@ int __set_page_dirty_no_writeback(struct page *page) * mapping is pinned by the vma's ->vm_file reference. * * We take care to handle the case where the page was truncated from the - * mapping by re-checking page_mapping() insode tree_lock. + * mapping by re-checking page_mapping() inside tree_lock. */ int __set_page_dirty_nobuffers(struct page *page) { @@ -1057,7 +1076,7 @@ static int __set_page_dirty(struct page *page) return 0; } -int fastcall set_page_dirty(struct page *page) +int set_page_dirty(struct page *page) { int ret = __set_page_dirty(page); if (ret)