]> Pileus Git - ~andy/linux/blobdiff - drivers/gpu/drm/i915/i915_gem.c
drm/i915: Update rules for writing through the LLC with the cpu
[~andy/linux] / drivers / gpu / drm / i915 / i915_gem.c
index b98c3b0e5a02970647d61093412bc069ec040503..54d76e9392d8825672f232c323682a7cd803b74f 100644 (file)
@@ -37,7 +37,8 @@
 #include <linux/dma-buf.h>
 
 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
-static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
+static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
+                                                  bool force);
 static __must_check int
 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
                           struct i915_address_space *vm,
@@ -67,6 +68,14 @@ static bool cpu_cache_is_coherent(struct drm_device *dev,
        return HAS_LLC(dev) || level != I915_CACHE_NONE;
 }
 
+static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+       if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+               return true;
+
+       return obj->pin_display;
+}
+
 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
 {
        if (obj->tiling_mode)
@@ -742,8 +751,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
                 * write domain and manually flush cachelines (if required). This
                 * optimizes for the case when the gpu will use the data
                 * right away and we therefore have to clflush anyway. */
-               if (obj->cache_level == I915_CACHE_NONE)
-                       needs_clflush_after = 1;
+               needs_clflush_after = cpu_write_needs_clflush(obj);
                if (i915_gem_obj_bound_any(obj)) {
                        ret = i915_gem_object_set_to_gtt_domain(obj, true);
                        if (ret)
@@ -833,7 +841,7 @@ out:
                 */
                if (!needs_clflush_after &&
                    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
-                       i915_gem_clflush_object(obj);
+                       i915_gem_clflush_object(obj, obj->pin_display);
                        i915_gem_chipset_flush(dev);
                }
        }
@@ -911,9 +919,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
                goto out;
        }
 
-       if (obj->cache_level == I915_CACHE_NONE &&
-           obj->tiling_mode == I915_TILING_NONE &&
-           obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
+       if (obj->tiling_mode == I915_TILING_NONE &&
+           obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
+           cpu_write_needs_clflush(obj)) {
                ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
                /* Note that the gtt paths might fail with non-page-backed user
                 * pointers (e.g. gtt mappings when moving data between
@@ -1262,8 +1270,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
        }
 
        /* Pinned buffers may be scanout, so flush the cache */
-       if (obj->pin_count)
-               i915_gem_object_flush_cpu_write_domain(obj);
+       if (obj->pin_display)
+               i915_gem_object_flush_cpu_write_domain(obj, true);
 
        drm_gem_object_unreference(&obj->base);
 unlock:
@@ -1640,7 +1648,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
                 * hope for the best.
                 */
                WARN_ON(ret != -EIO);
-               i915_gem_clflush_object(obj);
+               i915_gem_clflush_object(obj, true);
                obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
        }
 
@@ -3217,7 +3225,8 @@ err_unpin:
 }
 
 void
-i915_gem_clflush_object(struct drm_i915_gem_object *obj)
+i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+                       bool force)
 {
        /* If we don't have a page list set up, then we're not pinned
         * to GPU, and we can ignore the cache flush because it'll happen
@@ -3241,7 +3250,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
         * snooping behaviour occurs naturally as the result of our domain
         * tracking.
         */
-       if (obj->cache_level != I915_CACHE_NONE)
+       if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
                return;
 
        trace_i915_gem_object_clflush(obj);
@@ -3278,14 +3287,15 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
 
 /** Flushes the CPU write domain for the object if it's dirty. */
 static void
-i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
+i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
+                                      bool force)
 {
        uint32_t old_write_domain;
 
        if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
                return;
 
-       i915_gem_clflush_object(obj);
+       i915_gem_clflush_object(obj, force);
        i915_gem_chipset_flush(obj->base.dev);
        old_write_domain = obj->base.write_domain;
        obj->base.write_domain = 0;
@@ -3319,7 +3329,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
        if (ret)
                return ret;
 
-       i915_gem_object_flush_cpu_write_domain(obj);
+       i915_gem_object_flush_cpu_write_domain(obj, false);
 
        /* Serialise direct access to this object with the barriers for
         * coherent writes from the GPU, by effectively invalidating the
@@ -3409,7 +3419,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
                                               obj, cache_level);
        }
 
-       if (cache_level == I915_CACHE_NONE) {
+       list_for_each_entry(vma, &obj->vma_list, vma_link)
+               vma->node.color = cache_level;
+       obj->cache_level = cache_level;
+
+       if (cpu_write_needs_clflush(obj)) {
                u32 old_read_domains, old_write_domain;
 
                /* If we're coming from LLC cached, then we haven't
@@ -3432,9 +3446,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
                                                    old_write_domain);
        }
 
-       list_for_each_entry(vma, &obj->vma_list, vma_link)
-               vma->node.color = cache_level;
-       obj->cache_level = cache_level;
        i915_gem_verify_gtt(dev);
        return 0;
 }
@@ -3562,7 +3573,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
        if (ret)
                goto err_unpin_display;
 
-       i915_gem_object_flush_cpu_write_domain(obj);
+       i915_gem_object_flush_cpu_write_domain(obj, true);
 
        old_write_domain = obj->base.write_domain;
        old_read_domains = obj->base.read_domains;
@@ -3634,8 +3645,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 
        /* Flush the CPU cache if it's still invalid. */
        if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-               if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
-                       i915_gem_clflush_object(obj);
+               i915_gem_clflush_object(obj, false);
 
                obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
        }
@@ -3817,10 +3827,6 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
        obj->user_pin_count++;
        obj->pin_filp = file;
 
-       /* XXX - flush the CPU caches for pinned objects
-        * as the X server doesn't manage domains yet
-        */
-       i915_gem_object_flush_cpu_write_domain(obj);
        args->offset = i915_gem_obj_ggtt_offset(obj);
 out:
        drm_gem_object_unreference(&obj->base);