]> Pileus Git - ~andy/linux/blobdiff - drivers/gpu/drm/i915/i915_gem.c
drm/i915: Clear fence register on tiling stride change.
[~andy/linux] / drivers / gpu / drm / i915 / i915_gem.c
index 670d12881468297e8e0af0fe40222c3f54f5cb2f..174aef2d64815feddc14cdc7fea57f1ae602dfc6 100644 (file)
@@ -46,7 +46,6 @@ static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *o
 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
                                           unsigned alignment);
-static int i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write);
 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
 static int i915_gem_evict_something(struct drm_device *dev);
 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
@@ -989,10 +988,10 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
                return -ENODEV;
 
        /* Only handle setting domains to types used by the CPU. */
-       if (write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
+       if (write_domain & I915_GEM_GPU_DOMAINS)
                return -EINVAL;
 
-       if (read_domains & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
+       if (read_domains & I915_GEM_GPU_DOMAINS)
                return -EINVAL;
 
        /* Having something in the write domain implies it's in the read
@@ -1158,7 +1157,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        /* Need a new fence register? */
        if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
            obj_priv->tiling_mode != I915_TILING_NONE) {
-               ret = i915_gem_object_get_fence_reg(obj, write);
+               ret = i915_gem_object_get_fence_reg(obj);
                if (ret) {
                        mutex_unlock(&dev->struct_mutex);
                        return VM_FAULT_SIGBUS;
@@ -1481,14 +1480,19 @@ i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
  * Returned sequence numbers are nonzero on success.
  */
 static uint32_t
-i915_add_request(struct drm_device *dev, uint32_t flush_domains)
+i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
+                uint32_t flush_domains)
 {
        drm_i915_private_t *dev_priv = dev->dev_private;
+       struct drm_i915_file_private *i915_file_priv = NULL;
        struct drm_i915_gem_request *request;
        uint32_t seqno;
        int was_empty;
        RING_LOCALS;
 
+       if (file_priv != NULL)
+               i915_file_priv = file_priv->driver_priv;
+
        request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER);
        if (request == NULL)
                return 0;
@@ -1515,6 +1519,12 @@ i915_add_request(struct drm_device *dev, uint32_t flush_domains)
        request->emitted_jiffies = jiffies;
        was_empty = list_empty(&dev_priv->mm.request_list);
        list_add_tail(&request->list, &dev_priv->mm.request_list);
+       if (i915_file_priv) {
+               list_add_tail(&request->client_list,
+                             &i915_file_priv->mm.request_list);
+       } else {
+               INIT_LIST_HEAD(&request->client_list);
+       }
 
        /* Associate any objects on the flushing list matching the write
         * domain we're flushing with our flush.
@@ -1664,6 +1674,7 @@ i915_gem_retire_requests(struct drm_device *dev)
                        i915_gem_retire_request(dev, request);
 
                        list_del(&request->list);
+                       list_del(&request->client_list);
                        drm_free(request, sizeof(*request), DRM_MEM_DRIVER);
                } else
                        break;
@@ -1702,7 +1713,10 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
        BUG_ON(seqno == 0);
 
        if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
-               ier = I915_READ(IER);
+               if (IS_IGDNG(dev))
+                       ier = I915_READ(DEIER) | I915_READ(GTIER);
+               else
+                       ier = I915_READ(IER);
                if (!ier) {
                        DRM_ERROR("something (likely vbetool) disabled "
                                  "interrupts, re-enabling\n");
@@ -1754,8 +1768,7 @@ i915_gem_flush(struct drm_device *dev,
        if (flush_domains & I915_GEM_DOMAIN_CPU)
                drm_agp_chipset_flush(dev);
 
-       if ((invalidate_domains | flush_domains) & ~(I915_GEM_DOMAIN_CPU |
-                                                    I915_GEM_DOMAIN_GTT)) {
+       if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) {
                /*
                 * read/write caches:
                 *
@@ -1977,7 +1990,7 @@ i915_gem_evict_something(struct drm_device *dev)
                        i915_gem_flush(dev,
                                       obj->write_domain,
                                       obj->write_domain);
-                       i915_add_request(dev, obj->write_domain);
+                       i915_add_request(dev, NULL, obj->write_domain);
 
                        obj = NULL;
                        continue;
@@ -1991,7 +2004,7 @@ i915_gem_evict_something(struct drm_device *dev)
                /* If we didn't do any of the above, there's nothing to be done
                 * and we just can't fit it in.
                 */
-               return -ENOMEM;
+               return -ENOSPC;
        }
        return ret;
 }
@@ -2006,7 +2019,7 @@ i915_gem_evict_everything(struct drm_device *dev)
                if (ret != 0)
                        break;
        }
-       if (ret == -ENOMEM)
+       if (ret == -ENOSPC)
                return 0;
        return ret;
 }
@@ -2149,13 +2162,11 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
        val |= I830_FENCE_REG_VALID;
 
        I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
-
 }
 
 /**
  * i915_gem_object_get_fence_reg - set up a fence reg for an object
  * @obj: object to map through a fence reg
- * @write: object is about to be written
  *
  * When mapping objects through the GTT, userspace wants to be able to write
  * to them without having to worry about swizzling if the object is tiled.
@@ -2166,8 +2177,8 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
  * It then sets up the reg based on the object's properties: address, pitch
  * and tiling format.
  */
-static int
-i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write)
+int
+i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
 {
        struct drm_device *dev = obj->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2215,7 +2226,7 @@ try_again:
                loff_t offset;
 
                if (avail == 0)
-                       return -ENOMEM;
+                       return -ENOSPC;
 
                for (i = dev_priv->fence_reg_start;
                     i < dev_priv->num_fence_regs; i++) {
@@ -2248,7 +2259,7 @@ try_again:
                                i915_gem_flush(dev,
                                               I915_GEM_GPU_DOMAINS,
                                               I915_GEM_GPU_DOMAINS);
-                               seqno = i915_add_request(dev,
+                               seqno = i915_add_request(dev, NULL,
                                                         I915_GEM_GPU_DOMAINS);
                                if (seqno == 0)
                                        return -ENOMEM;
@@ -2260,9 +2271,6 @@ try_again:
                        goto try_again;
                }
 
-               BUG_ON(old_obj_priv->active ||
-                      (reg->obj->write_domain & I915_GEM_GPU_DOMAINS));
-
                /*
                 * Zap this virtual mapping so we can set up a fence again
                 * for this object next time we need it.
@@ -2319,6 +2327,42 @@ i915_gem_clear_fence_reg(struct drm_gem_object *obj)
        obj_priv->fence_reg = I915_FENCE_REG_NONE;
 }
 
+/**
+ * i915_gem_object_put_fence_reg - waits on outstanding fenced access
+ * to the buffer to finish, and then resets the fence register.
+ * @obj: tiled object holding a fence register.
+ *
+ * Zeroes out the fence register itself and clears out the associated
+ * data structures in dev_priv and obj_priv.
+ */
+int
+i915_gem_object_put_fence_reg(struct drm_gem_object *obj)
+{
+       struct drm_device *dev = obj->dev;
+       struct drm_i915_gem_object *obj_priv = obj->driver_private;
+
+       if (obj_priv->fence_reg == I915_FENCE_REG_NONE)
+               return 0;
+
+       /* On the i915, GPU access to tiled buffers is via a fence,
+        * therefore we must wait for any outstanding access to complete
+        * before clearing the fence.
+        */
+       if (!IS_I965G(dev)) {
+               int ret;
+
+               i915_gem_object_flush_gpu_write_domain(obj);
+               i915_gem_object_flush_gtt_write_domain(obj);
+               ret = i915_gem_object_wait_rendering(obj);
+               if (ret != 0)
+                       return ret;
+       }
+
+       i915_gem_clear_fence_reg (obj);
+
+       return 0;
+}
+
 /**
  * Finds free space in the GTT aperture and binds the object there.
  */
@@ -2367,7 +2411,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
                spin_unlock(&dev_priv->mm.active_list_lock);
                if (lists_empty) {
                        DRM_ERROR("GTT full, but LRU list empty\n");
-                       return -ENOMEM;
+                       return -ENOSPC;
                }
 
                ret = i915_gem_evict_something(dev);
@@ -2412,8 +2456,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
         * wasn't in the GTT, there shouldn't be any way it could have been in
         * a GPU cache
         */
-       BUG_ON(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
-       BUG_ON(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
+       BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
+       BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
 
        return 0;
 }
@@ -2455,7 +2499,7 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
 
        /* Queue the GPU write cache flushing we need. */
        i915_gem_flush(dev, 0, obj->write_domain);
-       seqno = i915_add_request(dev, obj->write_domain);
+       seqno = i915_add_request(dev, NULL, obj->write_domain);
        obj->write_domain = 0;
        i915_gem_object_move_to_active(obj, seqno);
 }
@@ -3038,20 +3082,12 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev,
        drm_i915_private_t *dev_priv = dev->dev_private;
        int nbox = exec->num_cliprects;
        int i = 0, count;
-       uint32_t        exec_start, exec_len;
+       uint32_t exec_start, exec_len;
        RING_LOCALS;
 
        exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
        exec_len = (uint32_t) exec->batch_len;
 
-       if ((exec_start | exec_len) & 0x7) {
-               DRM_ERROR("alignment\n");
-               return -EINVAL;
-       }
-
-       if (!exec_start)
-               return -EINVAL;
-
        count = nbox ? nbox : 1;
 
        for (i = 0; i < count; i++) {
@@ -3092,6 +3128,10 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev,
 /* Throttle our rendering by waiting until the ring has completed our requests
  * emitted over 20 msec ago.
  *
+ * Note that if we were to use the current jiffies each time around the loop,
+ * we wouldn't escape the function with any frames outstanding if the time to
+ * render a frame was over 20ms.
+ *
  * This should get us reasonable parallelism between CPU and GPU but also
  * relatively low latency when blocking on a particular request to finish.
  */
@@ -3100,15 +3140,25 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
 {
        struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
        int ret = 0;
-       uint32_t seqno;
+       unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
 
        mutex_lock(&dev->struct_mutex);
-       seqno = i915_file_priv->mm.last_gem_throttle_seqno;
-       i915_file_priv->mm.last_gem_throttle_seqno =
-               i915_file_priv->mm.last_gem_seqno;
-       if (seqno)
-               ret = i915_wait_request(dev, seqno);
+       while (!list_empty(&i915_file_priv->mm.request_list)) {
+               struct drm_i915_gem_request *request;
+
+               request = list_first_entry(&i915_file_priv->mm.request_list,
+                                          struct drm_i915_gem_request,
+                                          client_list);
+
+               if (time_after_eq(request->emitted_jiffies, recent_enough))
+                       break;
+
+               ret = i915_wait_request(dev, request->seqno);
+               if (ret != 0)
+                       break;
+       }
        mutex_unlock(&dev->struct_mutex);
+
        return ret;
 }
 
@@ -3185,12 +3235,29 @@ err:
        return ret;
 }
 
+static int
+i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer *exec,
+                          uint64_t exec_offset)
+{
+       uint32_t exec_start, exec_len;
+
+       exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
+       exec_len = (uint32_t) exec->batch_len;
+
+       if ((exec_start | exec_len) & 0x7)
+               return -EINVAL;
+
+       if (!exec_start)
+               return -EINVAL;
+
+       return 0;
+}
+
 int
 i915_gem_execbuffer(struct drm_device *dev, void *data,
                    struct drm_file *file_priv)
 {
        drm_i915_private_t *dev_priv = dev->dev_private;
-       struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
        struct drm_i915_gem_execbuffer *args = data;
        struct drm_i915_gem_exec_object *exec_list = NULL;
        struct drm_gem_object **object_list = NULL;
@@ -3315,7 +3382,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
                        break;
 
                /* error other than GTT full, or we've already tried again */
-               if (ret != -ENOMEM || pin_tries >= 1) {
+               if (ret != -ENOSPC || pin_tries >= 1) {
                        if (ret != -ERESTARTSYS)
                                DRM_ERROR("Failed to pin buffers %d\n", ret);
                        goto err;
@@ -3334,8 +3401,20 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 
        /* Set the pending read domains for the batch buffer to COMMAND */
        batch_obj = object_list[args->buffer_count-1];
-       batch_obj->pending_read_domains = I915_GEM_DOMAIN_COMMAND;
-       batch_obj->pending_write_domain = 0;
+       if (batch_obj->pending_write_domain) {
+               DRM_ERROR("Attempting to use self-modifying batch buffer\n");
+               ret = -EINVAL;
+               goto err;
+       }
+       batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
+
+       /* Sanity check the batch buffer, prior to moving objects */
+       exec_offset = exec_list[args->buffer_count - 1].offset;
+       ret = i915_gem_check_execbuffer (args, exec_offset);
+       if (ret != 0) {
+               DRM_ERROR("execbuf with invalid offset/length\n");
+               goto err;
+       }
 
        i915_verify_inactive(dev, __FILE__, __LINE__);
 
@@ -3366,7 +3445,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
                               dev->invalidate_domains,
                               dev->flush_domains);
                if (dev->flush_domains)
-                       (void)i915_add_request(dev, dev->flush_domains);
+                       (void)i915_add_request(dev, file_priv,
+                                              dev->flush_domains);
        }
 
        for (i = 0; i < args->buffer_count; i++) {
@@ -3384,8 +3464,6 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
        }
 #endif
 
-       exec_offset = exec_list[args->buffer_count - 1].offset;
-
 #if WATCH_EXEC
        i915_gem_dump_object(batch_obj,
                              args->batch_len,
@@ -3415,9 +3493,8 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
         * *some* interrupts representing completion of buffers that we can
         * wait on when trying to clear up gtt space).
         */
-       seqno = i915_add_request(dev, flush_domains);
+       seqno = i915_add_request(dev, file_priv, flush_domains);
        BUG_ON(seqno == 0);
-       i915_file_priv->mm.last_gem_seqno = seqno;
        for (i = 0; i < args->buffer_count; i++) {
                struct drm_gem_object *obj = object_list[i];
 
@@ -3506,7 +3583,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
        if (!IS_I965G(dev) &&
            obj_priv->fence_reg == I915_FENCE_REG_NONE &&
            obj_priv->tiling_mode != I915_TILING_NONE) {
-               ret = i915_gem_object_get_fence_reg(obj, true);
+               ret = i915_gem_object_get_fence_reg(obj);
                if (ret != 0) {
                        if (ret != -EBUSY && ret != -ERESTARTSYS)
                                DRM_ERROR("Failure to install fence: %d\n",
@@ -3523,8 +3600,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
                atomic_inc(&dev->pin_count);
                atomic_add(obj->size, &dev->pin_memory);
                if (!obj_priv->active &&
-                   (obj->write_domain & ~(I915_GEM_DOMAIN_CPU |
-                                          I915_GEM_DOMAIN_GTT)) == 0 &&
+                   (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0 &&
                    !list_empty(&obj_priv->list))
                        list_del_init(&obj_priv->list);
        }
@@ -3551,8 +3627,7 @@ i915_gem_object_unpin(struct drm_gem_object *obj)
         */
        if (obj_priv->pin_count == 0) {
                if (!obj_priv->active &&
-                   (obj->write_domain & ~(I915_GEM_DOMAIN_CPU |
-                                          I915_GEM_DOMAIN_GTT)) == 0)
+                   (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
                        list_move_tail(&obj_priv->list,
                                       &dev_priv->mm.inactive_list);
                atomic_dec(&dev->pin_count);
@@ -3656,15 +3731,14 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
        struct drm_gem_object *obj;
        struct drm_i915_gem_object *obj_priv;
 
-       mutex_lock(&dev->struct_mutex);
        obj = drm_gem_object_lookup(dev, file_priv, args->handle);
        if (obj == NULL) {
                DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",
                          args->handle);
-               mutex_unlock(&dev->struct_mutex);
                return -EBADF;
        }
 
+       mutex_lock(&dev->struct_mutex);
        /* Update the active list for the hardware's current position.
         * Otherwise this only updates on a delayed timer or when irqs are
         * actually unmasked, and our working set ends up being larger than
@@ -3803,9 +3877,8 @@ i915_gem_idle(struct drm_device *dev)
 
        /* Flush the GPU along with all non-CPU write domains
         */
-       i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT),
-                      ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
-       seqno = i915_add_request(dev, ~I915_GEM_DOMAIN_CPU);
+       i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+       seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS);
 
        if (seqno == 0) {
                mutex_unlock(&dev->struct_mutex);
@@ -4272,6 +4345,8 @@ void i915_gem_detach_phys_object(struct drm_device *dev,
        }
        drm_clflush_pages(obj_priv->pages, page_count);
        drm_agp_chipset_flush(dev);
+
+       i915_gem_object_put_pages(obj);
 out:
        obj_priv->phys_obj->cur_obj = NULL;
        obj_priv->phys_obj = NULL;
@@ -4329,6 +4404,8 @@ i915_gem_attach_phys_object(struct drm_device *dev,
                kunmap_atomic(src, KM_USER0);
        }
 
+       i915_gem_object_put_pages(obj);
+
        return 0;
 out:
        return ret;
@@ -4355,3 +4432,17 @@ i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
        drm_agp_chipset_flush(dev);
        return 0;
 }
+
+void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
+{
+       struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
+
+       /* Clean up our request list when the client is going away, so that
+        * later retire_requests won't dereference our soon-to-be-gone
+        * file_priv.
+        */
+       mutex_lock(&dev->struct_mutex);
+       while (!list_empty(&i915_file_priv->mm.request_list))
+               list_del_init(i915_file_priv->mm.request_list.next);
+       mutex_unlock(&dev->struct_mutex);
+}