diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 329fb3649dc3..48572b157222 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1954,12 +1954,6 @@ static int i915_context_status(struct seq_file *m, void *unused) seq_putc(m, '\n'); } - seq_printf(m, - "\tvma hashtable size=%u (actual %lu), count=%u\n", - ctx->vma_lut.ht_size, - BIT(ctx->vma_lut.ht_bits), - ctx->vma_lut.ht_count); - seq_putc(m, '\n'); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a4ce8fb25fdb..5b70bb9089fe 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2156,6 +2156,7 @@ struct drm_i915_private { struct kmem_cache *objects; struct kmem_cache *vmas; + struct kmem_cache *luts; struct kmem_cache *requests; struct kmem_cache *dependencies; struct kmem_cache *priorities; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5a3f3bb3f21d..b9e8e0d6e97b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3242,25 +3242,33 @@ out_rearm: void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) { + struct drm_i915_private *i915 = to_i915(gem->dev); struct drm_i915_gem_object *obj = to_intel_bo(gem); struct drm_i915_file_private *fpriv = file->driver_priv; - struct i915_vma *vma, *vn; + struct i915_lut_handle *lut, *ln; - mutex_lock(&obj->base.dev->struct_mutex); - list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link) - if (vma->vm->file == fpriv) + mutex_lock(&i915->drm.struct_mutex); + + list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) { + struct i915_gem_context *ctx = lut->ctx; + struct i915_vma *vma; + + if (ctx->file_priv != fpriv) + continue; + + vma = radix_tree_delete(&ctx->handles_vma, lut->handle); + + if (!i915_vma_is_ggtt(vma)) i915_vma_close(vma); - vma = obj->vma_hashed; - if (vma && vma->ctx->file_priv == fpriv) - i915_vma_unlink_ctx(vma); + list_del(&lut->obj_link); + list_del(&lut->ctx_link); - if (i915_gem_object_is_active(obj) && - !i915_gem_object_has_active_reference(obj)) { - i915_gem_object_set_active_reference(obj); - i915_gem_object_get(obj); + kmem_cache_free(i915->luts, lut); + __i915_gem_object_release_unless_active(obj); } - mutex_unlock(&obj->base.dev->struct_mutex); + + mutex_unlock(&i915->drm.struct_mutex); } static unsigned long to_wait_timeout(s64 timeout_ns) @@ -4252,6 +4260,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&obj->global_link); INIT_LIST_HEAD(&obj->userfault_link); INIT_LIST_HEAD(&obj->vma_list); + INIT_LIST_HEAD(&obj->lut_list); INIT_LIST_HEAD(&obj->batch_pool_link); obj->ops = ops; @@ -4495,8 +4504,8 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) { lockdep_assert_held(&obj->base.dev->struct_mutex); - GEM_BUG_ON(i915_gem_object_has_active_reference(obj)); - if (i915_gem_object_is_active(obj)) + if (!i915_gem_object_has_active_reference(obj) && + i915_gem_object_is_active(obj)) i915_gem_object_set_active_reference(obj); else i915_gem_object_put(obj); @@ -4888,12 +4897,16 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) if (!dev_priv->vmas) goto err_objects; + dev_priv->luts = KMEM_CACHE(i915_lut_handle, 0); + if (!dev_priv->luts) + goto err_vmas; + dev_priv->requests = KMEM_CACHE(drm_i915_gem_request, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_TYPESAFE_BY_RCU); if (!dev_priv->requests) - goto err_vmas; + goto err_luts; dev_priv->dependencies = KMEM_CACHE(i915_dependency, SLAB_HWCACHE_ALIGN | @@ -4937,6 +4950,8 @@ err_dependencies: kmem_cache_destroy(dev_priv->dependencies); err_requests: kmem_cache_destroy(dev_priv->requests); +err_luts: + kmem_cache_destroy(dev_priv->luts); err_vmas: kmem_cache_destroy(dev_priv->vmas); err_objects: @@ -4959,6 +4974,7 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) kmem_cache_destroy(dev_priv->priorities); kmem_cache_destroy(dev_priv->dependencies); kmem_cache_destroy(dev_priv->requests); + kmem_cache_destroy(dev_priv->luts); kmem_cache_destroy(dev_priv->vmas); kmem_cache_destroy(dev_priv->objects); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 86ac74a8a5b2..58a2a44f88bd 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -93,69 +93,28 @@ #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 -/* Initial size (as log2) to preallocate the handle->object hashtable */ -#define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */ - -static void resize_vma_ht(struct work_struct *work) +static void lut_close(struct i915_gem_context *ctx) { - struct i915_gem_context_vma_lut *lut = - container_of(work, typeof(*lut), resize); - unsigned int bits, new_bits, size, i; - struct hlist_head *new_ht; + struct i915_lut_handle *lut, *ln; + struct radix_tree_iter iter; + void __rcu **slot; - GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)); - - bits = 1 + ilog2(4*lut->ht_count/3 + 1); - new_bits = min_t(unsigned int, - max(bits, VMA_HT_BITS), - sizeof(unsigned int) * BITS_PER_BYTE - 1); - if (new_bits == lut->ht_bits) - goto out; - - new_ht = kzalloc(sizeof(*new_ht)<ht_bits); - for (i = 0; i < size; i++) { - struct i915_vma *vma; - struct hlist_node *tmp; - - hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node) - hlist_add_head(&vma->ctx_node, - &new_ht[hash_32(vma->ctx_handle, - new_bits)]); + list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) { + list_del(&lut->obj_link); + kmem_cache_free(ctx->i915->luts, lut); } - kvfree(lut->ht); - lut->ht = new_ht; - lut->ht_bits = new_bits; -out: - smp_store_release(&lut->ht_size, BIT(bits)); - GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS); -} -static void vma_lut_free(struct i915_gem_context *ctx) -{ - struct i915_gem_context_vma_lut *lut = &ctx->vma_lut; - unsigned int i, size; + radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) { + struct i915_vma *vma = rcu_dereference_raw(*slot); + struct drm_i915_gem_object *obj = vma->obj; - if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) - cancel_work_sync(&lut->resize); + radix_tree_iter_delete(&ctx->handles_vma, &iter, slot); - size = BIT(lut->ht_bits); - for (i = 0; i < size; i++) { - struct i915_vma *vma; + if (!i915_vma_is_ggtt(vma)) + i915_vma_close(vma); - hlist_for_each_entry(vma, &lut->ht[i], ctx_node) { - vma->obj->vma_hashed = NULL; - vma->ctx = NULL; - i915_vma_put(vma); - } + __i915_gem_object_release_unless_active(obj); } - kvfree(lut->ht); } static void i915_gem_context_free(struct i915_gem_context *ctx) @@ -165,7 +124,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); - vma_lut_free(ctx); i915_ppgtt_put(ctx->ppgtt); for (i = 0; i < I915_NUM_ENGINES; i++) { @@ -239,8 +197,11 @@ void i915_gem_context_release(struct kref *ref) static void context_close(struct i915_gem_context *ctx) { i915_gem_context_set_closed(ctx); + + lut_close(ctx); if (ctx->ppgtt) i915_ppgtt_close(&ctx->ppgtt->base); + ctx->file_priv = ERR_PTR(-EBADF); i915_gem_context_put(ctx); } @@ -313,16 +274,8 @@ __create_hw_context(struct drm_i915_private *dev_priv, ctx->i915 = dev_priv; ctx->priority = I915_PRIORITY_NORMAL; - ctx->vma_lut.ht_bits = VMA_HT_BITS; - ctx->vma_lut.ht_size = BIT(VMA_HT_BITS); - BUILD_BUG_ON(BIT(VMA_HT_BITS) == I915_CTX_RESIZE_IN_PROGRESS); - ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size, - sizeof(*ctx->vma_lut.ht), - GFP_KERNEL); - if (!ctx->vma_lut.ht) - goto err_out; - - INIT_WORK(&ctx->vma_lut.resize, resize_vma_ht); + INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); + INIT_LIST_HEAD(&ctx->handles_list); /* Default context will never have a file_priv */ ret = DEFAULT_CONTEXT_HANDLE; @@ -372,8 +325,6 @@ err_pid: put_pid(ctx->pid); idr_remove(&file_priv->context_idr, ctx->user_handle); err_lut: - kvfree(ctx->vma_lut.ht); -err_out: context_close(ctx); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 2d02918a449e..44688e22a5c2 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -27,6 +27,7 @@ #include #include +#include struct pid; @@ -149,32 +150,6 @@ struct i915_gem_context { /** ggtt_offset_bias: placement restriction for context objects */ u32 ggtt_offset_bias; - struct i915_gem_context_vma_lut { - /** ht_size: last request size to allocate the hashtable for. */ - unsigned int ht_size; -#define I915_CTX_RESIZE_IN_PROGRESS BIT(0) - /** ht_bits: real log2(size) of hashtable. */ - unsigned int ht_bits; - /** ht_count: current number of entries inside the hashtable */ - unsigned int ht_count; - - /** ht: the array of buckets comprising the simple hashtable */ - struct hlist_head *ht; - - /** - * resize: After an execbuf completes, we check the load factor - * of the hashtable. If the hashtable is too full, or too empty, - * we schedule a task to resize the hashtable. During the - * resize, the entries are moved between different buckets and - * so we cannot simultaneously read the hashtable as it is - * being resized (unlike rhashtable). Therefore we treat the - * active work as a strong barrier, pausing a subsequent - * execbuf to wait for the resize worker to complete, if - * required. - */ - struct work_struct resize; - } vma_lut; - /** engine: per-engine logical HW state */ struct intel_context { struct i915_vma *state; @@ -205,6 +180,18 @@ struct i915_gem_context { /** remap_slice: Bitmask of cache lines that need remapping */ u8 remap_slice; + + /** handles_vma: rbtree to look up our context specific obj/vma for + * the user handle. (user handles are per fd, but the binding is + * per vm, which may be one per context or shared with the global GTT) + */ + struct radix_tree_root handles_vma; + + /** handles_list: reverse list of all the rbtree entries in use for + * this context, which allows us to free all the allocations on + * context close. + */ + struct list_head handles_list; }; static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 95e461259d24..9f1057c4cf1c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -450,9 +450,7 @@ eb_validate_vma(struct i915_execbuffer *eb, } static int -eb_add_vma(struct i915_execbuffer *eb, - unsigned int i, struct i915_vma *vma, - unsigned int flags) +eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; int err; @@ -482,7 +480,7 @@ eb_add_vma(struct i915_execbuffer *eb, * to find the right target VMA when doing relocations. */ eb->vma[i] = vma; - eb->flags[i] = entry->flags | flags; + eb->flags[i] = entry->flags; vma->exec_flags = &eb->flags[i]; err = 0; @@ -647,19 +645,6 @@ static int eb_reserve(struct i915_execbuffer *eb) } while (1); } -static inline struct hlist_head * -ht_head(const struct i915_gem_context_vma_lut *lut, u32 handle) -{ - return &lut->ht[hash_32(handle, lut->ht_bits)]; -} - -static inline bool -ht_needs_resize(const struct i915_gem_context_vma_lut *lut) -{ - return (4*lut->ht_count > 3*lut->ht_size || - 4*lut->ht_count + 1 < lut->ht_size); -} - static unsigned int eb_batch_index(const struct i915_execbuffer *eb) { if (eb->args->flags & I915_EXEC_BATCH_FIRST) @@ -688,7 +673,7 @@ static int eb_select_context(struct i915_execbuffer *eb) static int eb_lookup_vmas(struct i915_execbuffer *eb) { - struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut; + struct radix_tree_root *handles_vma = &eb->ctx->handles_vma; struct drm_i915_gem_object *uninitialized_var(obj); unsigned int i; int err; @@ -702,24 +687,14 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) INIT_LIST_HEAD(&eb->relocs); INIT_LIST_HEAD(&eb->unbound); - if (unlikely(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)) - flush_work(&lut->resize); - GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS); - for (i = 0; i < eb->buffer_count; i++) { u32 handle = eb->exec[i].handle; - struct hlist_head *hl = ht_head(lut, handle); - unsigned int flags = 0; + struct i915_lut_handle *lut; struct i915_vma *vma; - hlist_for_each_entry(vma, hl, ctx_node) { - GEM_BUG_ON(vma->ctx != eb->ctx); - - if (vma->ctx_handle != handle) - continue; - + vma = radix_tree_lookup(handles_vma, handle); + if (likely(vma)) goto add_vma; - } obj = i915_gem_object_lookup(eb->file, handle); if (unlikely(!obj)) { @@ -733,26 +708,28 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) goto err_obj; } - /* First come, first served */ - if (!vma->ctx) { - vma->ctx = eb->ctx; - vma->ctx_handle = handle; - hlist_add_head(&vma->ctx_node, hl); - lut->ht_count++; - lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS; - if (i915_vma_is_ggtt(vma)) { - GEM_BUG_ON(obj->vma_hashed); - obj->vma_hashed = vma; - } - - /* transfer ref to ctx */ - obj = NULL; - } else { - flags = __EXEC_OBJECT_HAS_REF; + lut = kmem_cache_alloc(eb->i915->luts, GFP_KERNEL); + if (unlikely(!lut)) { + err = -ENOMEM; + goto err_obj; } + err = radix_tree_insert(handles_vma, handle, vma); + if (unlikely(err)) { + kfree(lut); + goto err_obj; + } + + list_add(&lut->obj_link, &obj->lut_list); + list_add(&lut->ctx_link, &eb->ctx->handles_list); + lut->ctx = eb->ctx; + lut->handle = handle; + + /* transfer ref to ctx */ + obj = NULL; + add_vma: - err = eb_add_vma(eb, i, vma, flags); + err = eb_add_vma(eb, i, vma); if (unlikely(err)) goto err_obj; @@ -760,13 +737,6 @@ add_vma: GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); } - if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) { - if (ht_needs_resize(lut)) - queue_work(system_highpri_wq, &lut->resize); - else - lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS; - } - /* take note of the batch buffer before we might reorder the lists */ i = eb_batch_index(eb); eb->batch = eb->vma[i]; @@ -794,7 +764,6 @@ err_obj: i915_gem_object_put(obj); err_vma: eb->vma[i] = NULL; - lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS; return err; } diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 3baa341432db..c30d8f808185 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -38,6 +38,19 @@ struct drm_i915_gem_object; +/* + * struct i915_lut_handle tracks the fast lookups from handle to vma used + * for execbuf. Although we use a radixtree for that mapping, in order to + * remove them as the object or context is closed, we need a secondary list + * and a translation entry (i915_lut_handle). + */ +struct i915_lut_handle { + struct list_head obj_link; + struct list_head ctx_link; + struct i915_gem_context *ctx; + u32 handle; +}; + struct drm_i915_gem_object_ops { unsigned int flags; #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) @@ -89,7 +102,15 @@ struct drm_i915_gem_object { * They are also added to @vma_list for easy iteration. */ struct rb_root vma_tree; - struct i915_vma *vma_hashed; + + /** + * @lut_list: List of vma lookup entries in use for this object. + * + * If this object is closed, we need to remove all of its VMA from + * the fast lookup index in associated contexts; @lut_list provides + * this translation from object to context->handles_vma. + */ + struct list_head lut_list; /** Stolen memory for this object, instead of being backed by shmem. */ struct drm_mm_node *stolen; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 958be0a95960..02d1a5eacb00 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -597,33 +597,11 @@ static void i915_vma_destroy(struct i915_vma *vma) kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); } -void i915_vma_unlink_ctx(struct i915_vma *vma) -{ - struct i915_gem_context *ctx = vma->ctx; - - if (ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS) { - cancel_work_sync(&ctx->vma_lut.resize); - ctx->vma_lut.ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS; - } - - __hlist_del(&vma->ctx_node); - ctx->vma_lut.ht_count--; - - if (i915_vma_is_ggtt(vma)) - vma->obj->vma_hashed = NULL; - vma->ctx = NULL; - - i915_vma_put(vma); -} - void i915_vma_close(struct i915_vma *vma) { GEM_BUG_ON(i915_vma_is_closed(vma)); vma->flags |= I915_VMA_CLOSED; - if (vma->ctx) - i915_vma_unlink_ctx(vma); - list_del(&vma->obj_link); rb_erase(&vma->obj_node, &vma->obj->vma_tree); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 5c49506d14bc..1fd61e88cfd0 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -115,10 +115,6 @@ struct i915_vma { unsigned int *exec_flags; struct hlist_node exec_node; u32 exec_handle; - - struct i915_gem_context *ctx; - struct hlist_node ctx_node; - u32 ctx_handle; }; struct i915_vma * diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c index d436f2d5089b..098ce643ad07 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -40,18 +40,13 @@ mock_context(struct drm_i915_private *i915, INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915; - ctx->vma_lut.ht_bits = VMA_HT_BITS; - ctx->vma_lut.ht_size = BIT(VMA_HT_BITS); - ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size, - sizeof(*ctx->vma_lut.ht), - GFP_KERNEL); - if (!ctx->vma_lut.ht) - goto err_free; + INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); + INIT_LIST_HEAD(&ctx->handles_list); ret = ida_simple_get(&i915->contexts.hw_ida, 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); if (ret < 0) - goto err_vma_ht; + goto err_handles; ctx->hw_id = ret; if (name) { @@ -66,9 +61,7 @@ mock_context(struct drm_i915_private *i915, return ctx; -err_vma_ht: - kvfree(ctx->vma_lut.ht); -err_free: +err_handles: kfree(ctx); return NULL; diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 898e87998417..3527eb364964 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -2022,6 +2022,7 @@ void radix_tree_iter_delete(struct radix_tree_root *root, if (__radix_tree_delete(root, iter->node, slot)) iter->index = iter->next_index; } +EXPORT_SYMBOL(radix_tree_iter_delete); /** * radix_tree_delete_item - delete an item from a radix tree