ext4_lblk_t end);
static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
int nr_to_scan);
+static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
+ struct ext4_inode_info *locked_ei);
int __init ext4_init_es(void)
{
if (tree->cache_es) {
es1 = tree->cache_es;
if (in_range(lblk, es1->es_lblk, es1->es_len)) {
- es_debug("%u cached by [%u/%u) %llu %llx\n",
+ es_debug("%u cached by [%u/%u) %llu %x\n",
lblk, es1->es_lblk, es1->es_len,
ext4_es_pblock(es1), ext4_es_status(es1));
goto out;
unsigned short ee_len;
int depth, ee_status, es_status;
- path = ext4_ext_find_extent(inode, es->es_lblk, NULL);
+ path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
if (IS_ERR(path))
return;
*/
int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len, ext4_fsblk_t pblk,
- unsigned long long status)
+ unsigned int status)
{
struct extent_status newes;
ext4_lblk_t end = lblk + len - 1;
int err = 0;
- es_debug("add [%u/%u) %llu %llx to extent status tree of inode %lu\n",
+ es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
lblk, len, pblk, status, inode->i_ino);
if (!len)
err = __es_remove_extent(inode, lblk, end);
if (err != 0)
goto error;
+retry:
err = __es_insert_extent(inode, &newes);
+ if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
+ EXT4_I(inode)))
+ goto retry;
+ if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
+ err = 0;
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
return err;
}
+/*
+ * ext4_es_cache_extent() inserts information into the extent status
+ * tree if and only if there isn't information about the range in
+ * question already.
+ */
+void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
+ ext4_lblk_t len, ext4_fsblk_t pblk,
+ unsigned int status)
+{
+ struct extent_status *es;
+ struct extent_status newes;
+ ext4_lblk_t end = lblk + len - 1;
+
+ newes.es_lblk = lblk;
+ newes.es_len = len;
+ ext4_es_store_pblock(&newes, pblk);
+ ext4_es_store_status(&newes, status);
+ trace_ext4_es_cache_extent(inode, &newes);
+
+ if (!len)
+ return;
+
+ BUG_ON(end < lblk);
+
+ write_lock(&EXT4_I(inode)->i_es_lock);
+
+ es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
+ if (!es || es->es_lblk > end)
+ __es_insert_extent(inode, &newes);
+ write_unlock(&EXT4_I(inode)->i_es_lock);
+}
+
/*
* ext4_es_lookup_extent() looks up an extent in extent status tree.
*
struct extent_status orig_es;
ext4_lblk_t len1, len2;
ext4_fsblk_t block;
- int err = 0;
+ int err;
+retry:
+ err = 0;
es = __es_tree_search(&tree->root, lblk);
if (!es)
goto out;
if (err) {
es->es_lblk = orig_es.es_lblk;
es->es_len = orig_es.es_len;
+ if ((err == -ENOMEM) &&
+ __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
+ EXT4_I(inode)))
+ goto retry;
goto out;
}
} else {
eia = list_entry(a, struct ext4_inode_info, i_es_lru);
eib = list_entry(b, struct ext4_inode_info, i_es_lru);
+ if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
+ !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
+ return 1;
+ if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
+ ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
+ return -1;
if (eia->i_touch_when == eib->i_touch_when)
return 0;
if (time_after(eia->i_touch_when, eib->i_touch_when))
return -1;
}
-static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
+static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
+ struct ext4_inode_info *locked_ei)
{
- struct ext4_sb_info *sbi = container_of(shrink,
- struct ext4_sb_info, s_es_shrinker);
struct ext4_inode_info *ei;
struct list_head *cur, *tmp;
- LIST_HEAD(skiped);
- int nr_to_scan = sc->nr_to_scan;
+ LIST_HEAD(skipped);
int ret, nr_shrunk = 0;
-
- ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
- trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
-
- if (!nr_to_scan)
- return ret;
+ int retried = 0, skip_precached = 1, nr_skipped = 0;
spin_lock(&sbi->s_es_lru_lock);
- /*
- * If the inode that is at the head of LRU list is newer than
- * last_sorted time, that means that we need to sort this list.
- */
- ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, i_es_lru);
- if (sbi->s_es_last_sorted < ei->i_touch_when) {
- list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
- sbi->s_es_last_sorted = jiffies;
- }
-
+retry:
list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
/*
* If we have already reclaimed all extents from extent
ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
- /* Skip the inode that is newer than the last_sorted time */
- if (sbi->s_es_last_sorted < ei->i_touch_when) {
- list_move_tail(cur, &skiped);
+ /*
+ * Skip the inode that is newer than the last_sorted
+ * time. Normally we try hard to avoid shrinking
+ * precached inodes, but we will as a last resort.
+ */
+ if ((sbi->s_es_last_sorted < ei->i_touch_when) ||
+ (skip_precached && ext4_test_inode_state(&ei->vfs_inode,
+ EXT4_STATE_EXT_PRECACHED))) {
+ nr_skipped++;
+ list_move_tail(cur, &skipped);
continue;
}
- if (ei->i_es_lru_nr == 0)
+ if (ei->i_es_lru_nr == 0 || ei == locked_ei)
continue;
write_lock(&ei->i_es_lock);
}
/* Move the newer inodes into the tail of the LRU list. */
- list_splice_tail(&skiped, &sbi->s_es_lru);
+ list_splice_tail(&skipped, &sbi->s_es_lru);
+ INIT_LIST_HEAD(&skipped);
+
+ /*
+ * If we skipped any inodes, and we weren't able to make any
+ * forward progress, sort the list and try again.
+ */
+ if ((nr_shrunk == 0) && nr_skipped && !retried) {
+ retried++;
+ list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
+ sbi->s_es_last_sorted = jiffies;
+ ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
+ i_es_lru);
+ /*
+ * If there are no non-precached inodes left on the
+ * list, start releasing precached extents.
+ */
+ if (ext4_test_inode_state(&ei->vfs_inode,
+ EXT4_STATE_EXT_PRECACHED))
+ skip_precached = 0;
+ goto retry;
+ }
+
spin_unlock(&sbi->s_es_lru_lock);
+ if (locked_ei && nr_shrunk == 0)
+ nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
+
+ return nr_shrunk;
+}
+
+static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
+{
+ struct ext4_sb_info *sbi = container_of(shrink,
+ struct ext4_sb_info, s_es_shrinker);
+ int nr_to_scan = sc->nr_to_scan;
+ int ret, nr_shrunk;
+
+ ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
+ trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
+
+ if (!nr_to_scan)
+ return ret;
+
+ nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
+
ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
return ret;
struct rb_node *node;
struct extent_status *es;
int nr_shrunk = 0;
+ static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
if (ei->i_es_lru_nr == 0)
return 0;
+ if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
+ __ratelimit(&_rs))
+ ext4_warning(inode->i_sb, "forced shrink of precached extents");
+
node = rb_first(&tree->root);
while (node != NULL) {
es = rb_entry(node, struct extent_status, rb_node);