X-Git-Url: http://pileus.org/git/?a=blobdiff_plain;f=mm%2Fpage-writeback.c;h=8d9b19f239c3ec03038e88cdd0a44e8a71c0d3e9;hb=d7a5c32442ed3d528b9ddfd3d5b837bad0ffa9da;hp=77a0bc4e261aba1a9290a389f99f299251cc38c8;hpb=1aedf2ccc60fade26c46fae12e28664d0da3f199;p=~andy%2Flinux diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 77a0bc4e261..8d9b19f239c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1,5 +1,5 @@ /* - * mm/page-writeback.c. + * mm/page-writeback.c * * Copyright (C) 2002, Linus Torvalds. * @@ -23,12 +23,15 @@ #include #include #include +#include #include #include #include #include #include #include +#include +#include /* * The maximum number of pages to writeout in a single bdflush/kupdate @@ -45,7 +48,6 @@ */ static long ratelimit_pages = 32; -static long total_pages; /* The total number of pages in the machine. */ static int dirty_exceeded __cacheline_aligned_in_smp; /* Dirty mem may be over limit */ /* @@ -125,7 +127,7 @@ get_dirty_limits(long *pbackground, long *pdirty, int unmapped_ratio; long background; long dirty; - unsigned long available_memory = total_pages; + unsigned long available_memory = vm_total_pages; struct task_struct *tsk; #ifdef CONFIG_HIGHMEM @@ -140,7 +142,7 @@ get_dirty_limits(long *pbackground, long *pdirty, unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) + global_page_state(NR_ANON_PAGES)) * 100) / - total_pages; + vm_total_pages; dirty_ratio = vm_dirty_ratio; if (dirty_ratio > unmapped_ratio / 2) @@ -220,7 +222,7 @@ static void balance_dirty_pages(struct address_space *mapping) if (pages_written >= write_chunk) break; /* We've done our duty */ } - blk_congestion_wait(WRITE, HZ/10); + congestion_wait(WRITE, HZ/10); } if (nr_reclaimable + global_page_state(NR_WRITEBACK) @@ -243,6 +245,16 @@ static void balance_dirty_pages(struct address_space *mapping) pdflush_operation(background_writeout, 0); } +void set_page_dirty_balance(struct page *page) +{ + if (set_page_dirty(page)) { + struct address_space *mapping = page_mapping(page); + + if (mapping) + balance_dirty_pages_ratelimited(mapping); + } +} + /** * balance_dirty_pages_ratelimited_nr - balance dirty memory state * @mapping: address_space which was dirtied @@ -302,7 +314,7 @@ void throttle_vm_writeout(void) if (global_page_state(NR_UNSTABLE_NFS) + global_page_state(NR_WRITEBACK) <= dirty_thresh) break; - blk_congestion_wait(WRITE, HZ/10); + congestion_wait(WRITE, HZ/10); } } @@ -339,7 +351,7 @@ static void background_writeout(unsigned long _min_pages) min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { /* Wrote less than expected */ - blk_congestion_wait(WRITE, HZ/10); + congestion_wait(WRITE, HZ/10); if (!wbc.encountered_congestion) break; } @@ -410,7 +422,7 @@ static void wb_kupdate(unsigned long arg) writeback_inodes(&wbc); if (wbc.nr_to_write > 0) { if (wbc.encountered_congestion) - blk_congestion_wait(WRITE, HZ/10); + congestion_wait(WRITE, HZ/10); else break; /* All the old data is written */ } @@ -491,9 +503,9 @@ void laptop_sync_completion(void) * will write six megabyte chunks, max. */ -static void set_ratelimit(void) +void writeback_set_ratelimit(void) { - ratelimit_pages = total_pages / (num_online_cpus() * 32); + ratelimit_pages = vm_total_pages / (num_online_cpus() * 32); if (ratelimit_pages < 16) ratelimit_pages = 16; if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024) @@ -503,7 +515,7 @@ static void set_ratelimit(void) static int __cpuinit ratelimit_handler(struct notifier_block *self, unsigned long u, void *v) { - set_ratelimit(); + writeback_set_ratelimit(); return 0; } @@ -522,9 +534,7 @@ void __init page_writeback_init(void) long buffer_pages = nr_free_buffer_pages(); long correction; - total_pages = nr_free_pagecache_pages(); - - correction = (100 * 4 * buffer_pages) / total_pages; + correction = (100 * 4 * buffer_pages) / vm_total_pages; if (correction < 100) { dirty_background_ratio *= correction; @@ -538,10 +548,143 @@ void __init page_writeback_init(void) vm_dirty_ratio = 1; } mod_timer(&wb_timer, jiffies + dirty_writeback_interval); - set_ratelimit(); + writeback_set_ratelimit(); register_cpu_notifier(&ratelimit_nb); } +/** + * generic_writepages - walk the list of dirty pages of the given + * address space and writepage() all of them. + * + * @mapping: address space structure to write + * @wbc: subtract the number of written pages from *@wbc->nr_to_write + * + * This is a library function, which implements the writepages() + * address_space_operation. + * + * If a page is already under I/O, generic_writepages() skips it, even + * if it's dirty. This is desirable behaviour for memory-cleaning writeback, + * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() + * and msync() need to guarantee that all the data which was dirty at the time + * the call was made get new I/O started against them. If wbc->sync_mode is + * WB_SYNC_ALL then we were called for data integrity and we must wait for + * existing IO to complete. + * + * Derived from mpage_writepages() - if you fix this you should check that + * also! + */ +int generic_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + int ret = 0; + int done = 0; + int (*writepage)(struct page *page, struct writeback_control *wbc); + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; /* Inclusive */ + int scanned = 0; + int range_whole = 0; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + + writepage = mapping->a_ops->writepage; + + /* deal with chardevs and other special file */ + if (!writepage) + return 0; + + pagevec_init(&pvec, 0); + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + scanned = 1; + } +retry: + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + unsigned i; + + scanned = 1; + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * At this point we hold neither mapping->tree_lock nor + * lock on the page itself: the page may be truncated or + * invalidated (changing page->mapping to NULL), or even + * swizzled back from swapper_space to tmpfs file + * mapping + */ + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + done = 1; + unlock_page(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + continue; + } + + ret = (*writepage)(page, wbc); + if (ret) { + if (ret == -ENOSPC) + set_bit(AS_ENOSPC, &mapping->flags); + else + set_bit(AS_EIO, &mapping->flags); + } + + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) + unlock_page(page); + if (ret || (--(wbc->nr_to_write) <= 0)) + done = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + done = 1; + } + } + pagevec_release(&pvec); + cond_resched(); + } + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; + return ret; +} + +EXPORT_SYMBOL(generic_writepages); + int do_writepages(struct address_space *mapping, struct writeback_control *wbc) { int ret; @@ -550,7 +693,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) return 0; wbc->for_writepages = 1; if (mapping->a_ops->writepages) - ret = mapping->a_ops->writepages(mapping, wbc); + ret = mapping->a_ops->writepages(mapping, wbc); else ret = generic_writepages(mapping, wbc); wbc->for_writepages = 0; @@ -664,9 +807,11 @@ int fastcall set_page_dirty(struct page *page) if (likely(mapping)) { int (*spd)(struct page *) = mapping->a_ops->set_page_dirty; - if (spd) - return (*spd)(page); - return __set_page_dirty_buffers(page); +#ifdef CONFIG_BLOCK + if (!spd) + spd = __set_page_dirty_buffers; +#endif + return (*spd)(page); } if (!PageDirty(page)) { if (!TestSetPageDirty(page)) @@ -690,7 +835,7 @@ int set_page_dirty_lock(struct page *page) { int ret; - lock_page(page); + lock_page_nosync(page); ret = set_page_dirty(page); unlock_page(page); return ret; @@ -712,9 +857,15 @@ int test_clear_page_dirty(struct page *page) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); - if (mapping_cap_account_dirty(mapping)) - __dec_zone_page_state(page, NR_FILE_DIRTY); write_unlock_irqrestore(&mapping->tree_lock, flags); + /* + * We can continue to use `mapping' here because the + * page is locked, which pins the address_space + */ + if (mapping_cap_account_dirty(mapping)) { + page_mkclean(page); + dec_zone_page_state(page, NR_FILE_DIRTY); + } return 1; } write_unlock_irqrestore(&mapping->tree_lock, flags); @@ -744,8 +895,10 @@ int clear_page_dirty_for_io(struct page *page) if (mapping) { if (TestClearPageDirty(page)) { - if (mapping_cap_account_dirty(mapping)) + if (mapping_cap_account_dirty(mapping)) { + page_mkclean(page); dec_zone_page_state(page, NR_FILE_DIRTY); + } return 1; } return 0; @@ -802,15 +955,6 @@ int test_set_page_writeback(struct page *page) } EXPORT_SYMBOL(test_set_page_writeback); -/* - * Wakes up tasks that are being throttled due to writeback congestion - */ -void writeback_congestion_end(void) -{ - blk_congestion_end(WRITE); -} -EXPORT_SYMBOL(writeback_congestion_end); - /* * Return true if any of the pages in the mapping are marged with the * passed tag.