diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 51e8fe5f1813..6eaf79504b58 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -790,6 +790,14 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) goto out_freewq; } + dev_priv->gpu_error.hangcheck_wq = + alloc_ordered_workqueue("i915-hangcheck", 0); + if (dev_priv->gpu_error.hangcheck_wq == NULL) { + DRM_ERROR("Failed to create our hangcheck workqueue.\n"); + ret = -ENOMEM; + goto out_freedpwq; + } + intel_irq_init(dev_priv); intel_uncore_sanitize(dev); @@ -864,6 +872,8 @@ out_gem_unload: intel_teardown_gmbus(dev); intel_teardown_mchbar(dev); pm_qos_remove_request(&dev_priv->pm_qos); + destroy_workqueue(dev_priv->gpu_error.hangcheck_wq); +out_freedpwq: destroy_workqueue(dev_priv->dp_wq); out_freewq: destroy_workqueue(dev_priv->wq); @@ -934,7 +944,7 @@ int i915_driver_unload(struct drm_device *dev) } /* Free error state after interrupts are fully disabled. */ - del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); + cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); cancel_work_sync(&dev_priv->gpu_error.work); i915_destroy_error_state(dev); @@ -960,6 +970,7 @@ int i915_driver_unload(struct drm_device *dev) destroy_workqueue(dev_priv->dp_wq); destroy_workqueue(dev_priv->wq); + destroy_workqueue(dev_priv->gpu_error.hangcheck_wq); pm_qos_remove_request(&dev_priv->pm_qos); i915_global_gtt_cleanup(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 9da4e60bdc7a..5f50e7033ef7 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1402,7 +1402,7 @@ static int intel_runtime_suspend(struct device *device) return ret; } - del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); + cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); intel_uncore_forcewake_reset(dev, false); dev_priv->pm.suspended = true; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 760d239a73f3..7aee7d5d90e2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1345,7 +1345,8 @@ struct i915_gpu_error { /* Hang gpu twice in this window and your context gets banned */ #define DRM_I915_CTX_BAN_PERIOD DIV_ROUND_UP(8*DRM_I915_HANGCHECK_PERIOD, 1000) - struct timer_list hangcheck_timer; + struct workqueue_struct *hangcheck_wq; + struct delayed_work hangcheck_work; /* For reset and error_state handling. */ spinlock_t lock; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9c7c95a8eae4..361d18b5223a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4608,7 +4608,7 @@ i915_gem_suspend(struct drm_device *dev) i915_gem_stop_ringbuffers(dev); mutex_unlock(&dev->struct_mutex); - del_timer_sync(&dev_priv->gpu_error.hangcheck_timer); + cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); cancel_delayed_work_sync(&dev_priv->mm.retire_work); flush_delayed_work(&dev_priv->mm.idle_work); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 2399eaed2ca3..23bfe2232b6a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2974,7 +2974,7 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd) return HANGCHECK_HUNG; } -/** +/* * This is called when the chip hasn't reported back with completed * batchbuffers in a long time. We keep track per ring seqno progress and * if there are no progress, hangcheck score for that ring is increased. @@ -2982,10 +2982,12 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd) * we kick the ring. If we see no progress on three subsequent calls * we assume chip is wedged and try to fix it by resetting the chip. */ -static void i915_hangcheck_elapsed(unsigned long data) +static void i915_hangcheck_elapsed(struct work_struct *work) { - struct drm_device *dev = (struct drm_device *)data; - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), + gpu_error.hangcheck_work.work); + struct drm_device *dev = dev_priv->dev; struct intel_engine_cs *ring; int i; int busy_count = 0, rings_hung = 0; @@ -3099,17 +3101,18 @@ static void i915_hangcheck_elapsed(unsigned long data) void i915_queue_hangcheck(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; - struct timer_list *timer = &dev_priv->gpu_error.hangcheck_timer; + struct i915_gpu_error *e = &to_i915(dev)->gpu_error; if (!i915.enable_hangcheck) return; - /* Don't continually defer the hangcheck, but make sure it is active */ - if (timer_pending(timer)) - return; - mod_timer(timer, - round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES)); + /* Don't continually defer the hangcheck so that it is always run at + * least once after work has been scheduled on any ring. Otherwise, + * we will ignore a hung ring if a second ring is kept busy. + */ + + queue_delayed_work(e->hangcheck_wq, &e->hangcheck_work, + round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES)); } static void ibx_irq_reset(struct drm_device *dev) @@ -4353,9 +4356,8 @@ void intel_irq_init(struct drm_i915_private *dev_priv) else dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS; - setup_timer(&dev_priv->gpu_error.hangcheck_timer, - i915_hangcheck_elapsed, - (unsigned long) dev); + INIT_DELAYED_WORK(&dev_priv->gpu_error.hangcheck_work, + i915_hangcheck_elapsed); INIT_DELAYED_WORK(&dev_priv->hotplug_reenable_work, intel_hpd_irq_reenable_work);