From e314ba42cb4dccd4d9edb2ffcb2295b4c4e2d00d Mon Sep 17 00:00:00 2001 From: Yannick Cote Date: Tue, 1 Mar 2022 19:54:52 -0500 Subject: [KPATCH CVE-2022-0330] drm/i915: kpatch fixes for CVE-2022-0330 Kernels: 4.18.0-348.el8 4.18.0-348.2.1.el8_5 4.18.0-348.7.1.el8_5 4.18.0-348.12.2.el8_5 Changes since last build: arches: x86_64 i915_drv.o: changed function: i915_driver_release i915_vma.o: changed function: i915_vma_bind intel_gt.o: new function: intel_gt_invalidate_tlbs intel_gt.o: new function: tlb_invalidate_lock_ctor intel_uncore.o: changed function: __intel_uncore_forcewake_put intel_uncore.o: changed function: __intel_wait_for_register intel_uncore.o: changed function: i915_pmic_bus_access_notifier intel_uncore.o: changed function: intel_uncore_forcewake_put intel_uncore.o: changed function: intel_uncore_forcewake_put__locked intel_uncore.o: changed function: intel_uncore_forcewake_user_put intel_uncore.o: new function: intel_uncore_forcewake_put_delayed --------------------------- Kpatch-MR: https://gitlab.com/redhat/prdsc/rhel/src/kpatch/rhel-8/-/merge_requests/33 Approved-by: Joe Lawrence (@joe.lawrence) Kernels: 4.18.0-348.el8 4.18.0-348.2.1.el8_5 4.18.0-348.7.1.el8_5 4.18.0-348.12.2.el8_5 Modifications: - Move new bit definition to .c files avoiding changes to .h files. - Redefine tlb_invalidate_lock as a klp shadow variable and avoid changes to global structure definition (struct intel_gt). commit 01dfa79afb751b4fec242c7d05ee2e0f78fe9a78 Author: Patrick Talbert Date: Mon Jan 31 10:33:24 2022 +0100 drm/i915: Flush TLBs before releasing backing store Bugzilla: https://bugzilla.redhat.com/2044328 CVE: CVE-2022-0330 Y-Commit: 5dfb7de610e0b38a03d4d71bdc6cb23a8af0161d commit 7938d61591d33394a21bdd7797a245b65428f44c Author: Tvrtko Ursulin Date: Tue Oct 19 13:27:10 2021 +0100 drm/i915: Flush TLBs before releasing backing store We need to flush TLBs before releasing backing store otherwise userspace is able to encounter stale entries if a) it is not declaring GPU access to certain buffers and b) this GPU execution then races with the backing store release getting triggered asynchronously. Approach taken is to mark any buffer objects which were ever bound to the GPU and triggering a serialized TLB flush when their backing store is released. Alternatively the flushing could be done on VMA unbind, at which point we would be able to ascertain whether there is potential parallel GPU execution (which could race), but choice essentially boils down to paying the cost of TLB flushes maybe needlessly at VMA unbind time (when the backing store is not known to be definitely going away, so flushing not always required for safety), versus potentially needlessly at backing store relase time since at that point cannot tell whether there is a parallel GPU execution happening. Therefore simplicity of implementation has been chosen for now, with scope to benchmark and refine later as required. Signed-off-by: Tvrtko Ursulin Reported-by: Sushma Venkatesh Reddy Cc: Daniel Vetter Cc: Jon Bloomfield Cc: Joonas Lahtinen Cc: Jani Nikula Cc: stable@vger.kernel.org Signed-off-by: Patrick Talbert Signed-off-by: Yannick Cote --- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 13 +++ drivers/gpu/drm/i915/gt/intel_gt.c | 130 ++++++++++++++++++++++ drivers/gpu/drm/i915/i915_drv.c | 5 + drivers/gpu/drm/i915/i915_vma.c | 6 + drivers/gpu/drm/i915/intel_uncore.c | 26 ++++- 5 files changed, 176 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 76574e245916..ba7fce675ee7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -173,6 +173,11 @@ static void unmap_object(struct drm_i915_gem_object *obj, void *ptr) vunmap(ptr); } +/* CVE-2022-0330 - kpatch gathered definitions */ +#define I915_BO_WAS_BOUND_BIT 4 + +void intel_gt_invalidate_tlbs(struct intel_gt *gt); + struct sg_table * __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) { @@ -195,6 +200,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) __i915_gem_object_reset_page_iter(obj); obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + intel_wakeref_t wakeref; + + with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref) + intel_gt_invalidate_tlbs(&i915->gt); + } + return pages; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index d8e1ab412634..da0b144ea418 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -662,3 +662,133 @@ void intel_gt_info_print(const struct intel_gt_info *info, intel_sseu_dump(&info->sseu, p); } + +struct reg_and_bit { + i915_reg_t reg; + u32 bit; +}; + +static struct reg_and_bit +get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, + const i915_reg_t *regs, const unsigned int num) +{ + const unsigned int class = engine->class; + struct reg_and_bit rb = { }; + + if (drm_WARN_ON_ONCE(&engine->i915->drm, + class >= num || !regs[class].reg)) + return rb; + + rb.reg = regs[class]; + if (gen8 && class == VIDEO_DECODE_CLASS) + rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ + else + rb.bit = engine->instance; + + rb.bit = BIT(rb.bit); + + return rb; +} + +/* CVE-2022-0330 - kpatch gathered definitions */ +#include +#define KLP_CVE_2022_0330_MUTEX 0x2022033000000001 +#define GEN8_RTCR _MMIO(0x4260) +#define GEN8_M1TCR _MMIO(0x4264) +#define GEN8_M2TCR _MMIO(0x4268) +#define GEN8_BTCR _MMIO(0x426c) +#define GEN8_VTCR _MMIO(0x4270) +#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8) +#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc) +#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0) +#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4) + +void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, + enum forcewake_domains domains); + +static int tlb_invalidate_lock_ctor(void *obj, void *shadow_data, void *ctor_data) +{ + struct mutex *m = shadow_data; + mutex_init(m); + + return 0; +} + +void intel_gt_invalidate_tlbs(struct intel_gt *gt) +{ + static const i915_reg_t gen8_regs[] = { + [RENDER_CLASS] = GEN8_RTCR, + [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ + [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, + [COPY_ENGINE_CLASS] = GEN8_BTCR, + }; + static const i915_reg_t gen12_regs[] = { + [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR, + [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR, + [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR, + [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, + }; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; + enum intel_engine_id id; + const i915_reg_t *regs; + unsigned int num = 0; + struct mutex *tlb_invalidate_lock; + + if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) + return; + + if (INTEL_GEN(i915) == 12) { + regs = gen12_regs; + num = ARRAY_SIZE(gen12_regs); + } else if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) <= 11) { + regs = gen8_regs; + num = ARRAY_SIZE(gen8_regs); + } else if (INTEL_GEN(i915) < 8) { + return; + } + + if (drm_WARN_ONCE(&i915->drm, !num, + "Platform does not implement TLB invalidation!")) + return; + + GEM_TRACE("\n"); + + assert_rpm_wakelock_held(&i915->runtime_pm); + + tlb_invalidate_lock = klp_shadow_get_or_alloc(i915, KLP_CVE_2022_0330_MUTEX, + sizeof(*tlb_invalidate_lock), GFP_KERNEL, + tlb_invalidate_lock_ctor, NULL); + if (tlb_invalidate_lock) { + mutex_lock(tlb_invalidate_lock); + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + for_each_engine(engine, gt, id) { + /* + * HW architecture suggest typical invalidation time at 40us, + * with pessimistic cases up to 100us and a recommendation to + * cap at 1ms. We go a bit higher just in case. + */ + const unsigned int timeout_us = 100; + const unsigned int timeout_ms = 4; + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + if (__intel_wait_for_register_fw(uncore, + rb.reg, rb.bit, 0, + timeout_us, timeout_ms, + NULL)) + drm_err_ratelimited(>->i915->drm, + "%s TLB invalidation did not complete in %ums!\n", + engine->name, timeout_ms); + } + + intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); + mutex_unlock(tlb_invalidate_lock); + } +} diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 92668bcbece0..31b298618e7a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -957,6 +957,10 @@ void i915_driver_remove(struct drm_i915_private *i915) enable_rpm_wakeref_asserts(&i915->runtime_pm); } +/* CVE-2022-0330 - kpatch gathered definitions */ +#include +#define KLP_CVE_2022_0330_MUTEX 0x2022033000000001 + static void i915_driver_release(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -979,6 +983,7 @@ static void i915_driver_release(struct drm_device *dev) intel_runtime_pm_driver_release(rpm); i915_driver_late_release(dev_priv); + klp_shadow_free(dev_priv, KLP_CVE_2022_0330_MUTEX, NULL); } static int i915_driver_open(struct drm_device *dev, struct drm_file *file) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index caa9b041616b..8b2f1c8b2170 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -362,6 +362,9 @@ int i915_vma_wait_for_bind(struct i915_vma *vma) return err; } +/* CVE-2022-0330 - kpatch gathered definitions */ +#define I915_BO_WAS_BOUND_BIT 4 + /** * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. * @vma: VMA to map @@ -439,6 +442,9 @@ int i915_vma_bind(struct i915_vma *vma, vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); } + if (vma->obj) + set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); + atomic_or(bind_flags, &vma->flags); return 0; } diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 9ac501bcfdad..9eb5d9e8e5a8 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -694,7 +694,8 @@ void intel_uncore_forcewake_get__locked(struct intel_uncore *uncore, } static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, - enum forcewake_domains fw_domains) + enum forcewake_domains fw_domains, + bool delayed) { struct intel_uncore_forcewake_domain *domain; unsigned int tmp; @@ -709,7 +710,11 @@ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, continue; } - uncore->funcs.force_wake_put(uncore, domain->mask); + if (delayed && + !(domain->uncore->fw_domains_timer & domain->mask)) + fw_domain_arm_timer(domain); + else + uncore->funcs.force_wake_put(uncore, domain->mask); } } @@ -730,7 +735,20 @@ void intel_uncore_forcewake_put(struct intel_uncore *uncore, return; spin_lock_irqsave(&uncore->lock, irqflags); - __intel_uncore_forcewake_put(uncore, fw_domains); + __intel_uncore_forcewake_put(uncore, fw_domains, false); + spin_unlock_irqrestore(&uncore->lock, irqflags); +} + +void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, + enum forcewake_domains fw_domains) +{ + unsigned long irqflags; + + if (!uncore->funcs.force_wake_put) + return; + + spin_lock_irqsave(&uncore->lock, irqflags); + __intel_uncore_forcewake_put(uncore, fw_domains, true); spin_unlock_irqrestore(&uncore->lock, irqflags); } @@ -772,7 +790,7 @@ void intel_uncore_forcewake_put__locked(struct intel_uncore *uncore, if (!uncore->funcs.force_wake_put) return; - __intel_uncore_forcewake_put(uncore, fw_domains); + __intel_uncore_forcewake_put(uncore, fw_domains, false); } void assert_forcewakes_inactive(struct intel_uncore *uncore) -- 2.34.1