linux_dsm_epyc7002/drivers/gpu/drm/i915/intel_uc.c
Michal Wajdeczko bf67ce62a0 drm/i915/guc: Don't forget to free GuC error log
We're freeing GuC error log in uc_fini_hw() that matches
corresponding uc_init_hw() but we missed the point that this
log object is copied on error path and in case of failure in
uc_init_hw() we will leak this object as uc_fini_hw() is
never called.

If we free this log object as part of the late uC cleanup, where
we also release other firmware objects, we can avoid this BUG:

[70841.001413] BUG drm_i915_gem_object (Tainted: G     U  W       ): Objects remaining in drm_i915_gem_object on __kmem_cache_shutdown()
[70841.001436] INFO: Slab 0x00000000c94e41af objects=21 used=1 fp=0x000000001d60c40a flags=0x8000000000008100

[70841.001466] Call Trace:
[70841.001471]  dump_stack+0x5e/0x8e
[70841.001476]  slab_err+0x99/0xb0
[70841.001483]  ? __slab_alloc.isra.24.constprop.29+0x62/0x70
[70841.001491]  ? __kmalloc+0x1f5/0x320
[70841.001497]  __kmem_cache_shutdown+0x18b/0x400
[70841.001505]  shutdown_cache+0x13/0x1c0
[70841.001511]  kmem_cache_destroy+0x1c2/0x240
[70841.001517]  ? __mutex_unlock_slowpath+0x38/0x270
[70841.001559]  i915_gem_load_cleanup+0xbc/0x130 [i915]
[70841.001595]  i915_driver_cleanup_early+0x11/0x60 [i915]
[70841.001630]  i915_driver_load+0x708/0x1720 [i915]
[70841.001638]  ? trace_hardirqs_on_caller+0xe2/0x1c0
[70841.001673]  i915_pci_probe+0x2d/0x90 [i915]
[70841.001680]  pci_device_probe+0x9c/0x120
[70841.001687]  driver_probe_device+0x2a9/0x490
[70841.001694]  __driver_attach+0xd9/0xe0
[70841.001700]  ? driver_probe_device+0x490/0x490
[70841.001705]  bus_for_each_dev+0x57/0x90
[70841.001712]  bus_add_driver+0x1eb/0x260
[70841.001717]  ? 0xffffffffa0685000
[70841.001723]  driver_register+0x52/0xc0
[70841.001728]  ? 0xffffffffa0685000
[70841.001733]  do_one_initcall+0x39/0x170
[70841.001739]  ? rcu_read_lock_sched_held+0x6f/0x80
[70841.001746]  ? kmem_cache_alloc_trace+0x27b/0x2e0
[70841.001753]  do_init_module+0x56/0x1ec
[70841.001759]  load_module+0x219e/0x2550
[70841.001766]  ? vfs_read+0x121/0x140
[70841.001774]  ? SyS_finit_module+0xa5/0xe0
[70841.001779]  SyS_finit_module+0xa5/0xe0
[70841.001788]  entry_SYSCALL_64_fastpath+0x22/0x8f

[70841.001806] INFO: Object 0x00000000eab7ed96 @offset=6208
[70841.001850] INFO: Allocated in i915_gem_object_create.part.32+0x1f/0x260 [i915] age=38 cpu=0 pid=2708
[70841.001861] 	kmem_cache_alloc+0x23d/0x2d0
[70841.001897] 	i915_gem_object_create.part.32+0x1f/0x260 [i915]
[70841.001937] 	intel_guc_allocate_vma+0x15/0x100 [i915]
[70841.001977] 	intel_guc_log_create+0x34/0x1c0 [i915]
[70841.002014] 	intel_guc_init+0x5a/0x100 [i915]
[70841.002051] 	intel_uc_init+0x3e/0xb0 [i915]
[70841.002089] 	i915_gem_init+0x18e/0x540 [i915]
[70841.002123] 	i915_driver_load+0xa7a/0x1720 [i915]
[70841.002159] 	i915_pci_probe+0x2d/0x90 [i915]
[70841.002165] 	pci_device_probe+0x9c/0x120
[70841.002171] 	driver_probe_device+0x2a9/0x490
[70841.002177] 	__driver_attach+0xd9/0xe0
[70841.002182] 	bus_for_each_dev+0x57/0x90
[70841.002188] 	bus_add_driver+0x1eb/0x260
[70841.002193] 	driver_register+0x52/0xc0
[70841.002198] 	do_one_initcall+0x39/0x170
[70841.002462] kmem_cache_destroy drm_i915_gem_object: Slab cache still has objects

[70841.002491] Call Trace:
[70841.002497]  dump_stack+0x5e/0x8e
[70841.002503]  kmem_cache_destroy+0x1e0/0x240
[70841.002509]  ? __mutex_unlock_slowpath+0x38/0x270
[70841.002551]  i915_gem_load_cleanup+0xbc/0x130 [i915]
[70841.002586]  i915_driver_cleanup_early+0x11/0x60 [i915]
[70841.002621]  i915_driver_load+0x708/0x1720 [i915]
[70841.002629]  ? trace_hardirqs_on_caller+0xe2/0x1c0
[70841.002664]  i915_pci_probe+0x2d/0x90 [i915]
[70841.002671]  pci_device_probe+0x9c/0x120
[70841.002678]  driver_probe_device+0x2a9/0x490
[70841.002684]  __driver_attach+0xd9/0xe0
[70841.002690]  ? driver_probe_device+0x490/0x490
[70841.002696]  bus_for_each_dev+0x57/0x90
[70841.002702]  bus_add_driver+0x1eb/0x260
[70841.002708]  ? 0xffffffffa0685000
[70841.002713]  driver_register+0x52/0xc0
[70841.002719]  ? 0xffffffffa0685000
[70841.002724]  do_one_initcall+0x39/0x170
[70841.002731]  ? rcu_read_lock_sched_held+0x6f/0x80
[70841.002737]  ? kmem_cache_alloc_trace+0x27b/0x2e0
[70841.002745]  do_init_module+0x56/0x1ec
[70841.002751]  load_module+0x219e/0x2550
[70841.002758]  ? vfs_read+0x121/0x140
[70841.002766]  ? SyS_finit_module+0xa5/0xe0
[70841.002772]  SyS_finit_module+0xa5/0xe0
[70841.002781]  entry_SYSCALL_64_fastpath+0x22/0x8f

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180131173241.19704-2-michal.wajdeczko@intel.com
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
2018-02-01 12:33:57 +00:00

448 lines
11 KiB
C

/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#include "intel_uc.h"
#include "intel_guc_submission.h"
#include "intel_guc.h"
#include "i915_drv.h"
static void guc_free_load_err_log(struct intel_guc *guc);
/* Reset GuC providing us with fresh state for both GuC and HuC.
*/
static int __intel_uc_reset_hw(struct drm_i915_private *dev_priv)
{
int ret;
u32 guc_status;
ret = intel_reset_guc(dev_priv);
if (ret) {
DRM_ERROR("Failed to reset GuC, ret = %d\n", ret);
return ret;
}
guc_status = I915_READ(GUC_STATUS);
WARN(!(guc_status & GS_MIA_IN_RESET),
"GuC status: 0x%x, MIA core expected to be in reset\n",
guc_status);
return ret;
}
static int __get_platform_enable_guc(struct drm_i915_private *dev_priv)
{
struct intel_uc_fw *guc_fw = &dev_priv->guc.fw;
struct intel_uc_fw *huc_fw = &dev_priv->huc.fw;
int enable_guc = 0;
/* Default is to enable GuC/HuC if we know their firmwares */
if (intel_uc_fw_is_selected(guc_fw))
enable_guc |= ENABLE_GUC_SUBMISSION;
if (intel_uc_fw_is_selected(huc_fw))
enable_guc |= ENABLE_GUC_LOAD_HUC;
/* Any platform specific fine-tuning can be done here */
return enable_guc;
}
static int __get_default_guc_log_level(struct drm_i915_private *dev_priv)
{
int guc_log_level = 0; /* disabled */
/* Enable if we're running on platform with GuC and debug config */
if (HAS_GUC(dev_priv) && intel_uc_is_using_guc() &&
(IS_ENABLED(CONFIG_DRM_I915_DEBUG) ||
IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)))
guc_log_level = 1 + GUC_LOG_VERBOSITY_MAX;
/* Any platform specific fine-tuning can be done here */
return guc_log_level;
}
/**
* intel_uc_sanitize_options - sanitize uC related modparam options
* @dev_priv: device private
*
* In case of "enable_guc" option this function will attempt to modify
* it only if it was initially set to "auto(-1)". Default value for this
* modparam varies between platforms and it is hardcoded in driver code.
* Any other modparam value is only monitored against availability of the
* related hardware or firmware definitions.
*
* In case of "guc_log_level" option this function will attempt to modify
* it only if it was initially set to "auto(-1)" or if initial value was
* "enable(1..4)" on platforms without the GuC. Default value for this
* modparam varies between platforms and is usually set to "disable(0)"
* unless GuC is enabled on given platform and the driver is compiled with
* debug config when this modparam will default to "enable(1..4)".
*/
void intel_uc_sanitize_options(struct drm_i915_private *dev_priv)
{
struct intel_uc_fw *guc_fw = &dev_priv->guc.fw;
struct intel_uc_fw *huc_fw = &dev_priv->huc.fw;
/* A negative value means "use platform default" */
if (i915_modparams.enable_guc < 0)
i915_modparams.enable_guc = __get_platform_enable_guc(dev_priv);
DRM_DEBUG_DRIVER("enable_guc=%d (submission:%s huc:%s)\n",
i915_modparams.enable_guc,
yesno(intel_uc_is_using_guc_submission()),
yesno(intel_uc_is_using_huc()));
/* Verify GuC firmware availability */
if (intel_uc_is_using_guc() && !intel_uc_fw_is_selected(guc_fw)) {
DRM_WARN("Incompatible option detected: %s=%d, %s!\n",
"enable_guc", i915_modparams.enable_guc,
!HAS_GUC(dev_priv) ? "no GuC hardware" :
"no GuC firmware");
}
/* Verify HuC firmware availability */
if (intel_uc_is_using_huc() && !intel_uc_fw_is_selected(huc_fw)) {
DRM_WARN("Incompatible option detected: %s=%d, %s!\n",
"enable_guc", i915_modparams.enable_guc,
!HAS_HUC(dev_priv) ? "no HuC hardware" :
"no HuC firmware");
}
/* A negative value means "use platform/config default" */
if (i915_modparams.guc_log_level < 0)
i915_modparams.guc_log_level =
__get_default_guc_log_level(dev_priv);
if (i915_modparams.guc_log_level > 0 && !intel_uc_is_using_guc()) {
DRM_WARN("Incompatible option detected: %s=%d, %s!\n",
"guc_log_level", i915_modparams.guc_log_level,
!HAS_GUC(dev_priv) ? "no GuC hardware" :
"GuC not enabled");
i915_modparams.guc_log_level = 0;
}
if (i915_modparams.guc_log_level > 1 + GUC_LOG_VERBOSITY_MAX) {
DRM_WARN("Incompatible option detected: %s=%d, %s!\n",
"guc_log_level", i915_modparams.guc_log_level,
"verbosity too high");
i915_modparams.guc_log_level = 1 + GUC_LOG_VERBOSITY_MAX;
}
DRM_DEBUG_DRIVER("guc_log_level=%d (enabled:%s verbosity:%d)\n",
i915_modparams.guc_log_level,
yesno(i915_modparams.guc_log_level),
i915_modparams.guc_log_level - 1);
/* Make sure that sanitization was done */
GEM_BUG_ON(i915_modparams.enable_guc < 0);
GEM_BUG_ON(i915_modparams.guc_log_level < 0);
}
void intel_uc_init_early(struct drm_i915_private *dev_priv)
{
intel_guc_init_early(&dev_priv->guc);
intel_huc_init_early(&dev_priv->huc);
}
void intel_uc_init_fw(struct drm_i915_private *dev_priv)
{
if (!USES_GUC(dev_priv))
return;
if (USES_HUC(dev_priv))
intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw);
intel_uc_fw_fetch(dev_priv, &dev_priv->guc.fw);
}
void intel_uc_fini_fw(struct drm_i915_private *dev_priv)
{
if (!USES_GUC(dev_priv))
return;
intel_uc_fw_fini(&dev_priv->guc.fw);
if (USES_HUC(dev_priv))
intel_uc_fw_fini(&dev_priv->huc.fw);
guc_free_load_err_log(&dev_priv->guc);
}
/**
* intel_uc_init_mmio - setup uC MMIO access
*
* @dev_priv: device private
*
* Setup minimal state necessary for MMIO accesses later in the
* initialization sequence.
*/
void intel_uc_init_mmio(struct drm_i915_private *dev_priv)
{
intel_guc_init_send_regs(&dev_priv->guc);
}
static void guc_capture_load_err_log(struct intel_guc *guc)
{
if (!guc->log.vma || !i915_modparams.guc_log_level)
return;
if (!guc->load_err_log)
guc->load_err_log = i915_gem_object_get(guc->log.vma->obj);
return;
}
static void guc_free_load_err_log(struct intel_guc *guc)
{
if (guc->load_err_log)
i915_gem_object_put(guc->load_err_log);
}
static int guc_enable_communication(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
if (HAS_GUC_CT(dev_priv))
return intel_guc_enable_ct(guc);
guc->send = intel_guc_send_mmio;
return 0;
}
static void guc_disable_communication(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
if (HAS_GUC_CT(dev_priv))
intel_guc_disable_ct(guc);
guc->send = intel_guc_send_nop;
}
int intel_uc_init_misc(struct drm_i915_private *dev_priv)
{
struct intel_guc *guc = &dev_priv->guc;
int ret;
if (!USES_GUC(dev_priv))
return 0;
ret = intel_guc_init_wq(guc);
if (ret) {
DRM_ERROR("Couldn't allocate workqueues for GuC\n");
goto err;
}
ret = intel_guc_log_relay_create(guc);
if (ret) {
DRM_ERROR("Couldn't allocate relay for GuC log\n");
goto err_relay;
}
return 0;
err_relay:
intel_guc_fini_wq(guc);
err:
return ret;
}
void intel_uc_fini_misc(struct drm_i915_private *dev_priv)
{
struct intel_guc *guc = &dev_priv->guc;
if (!USES_GUC(dev_priv))
return;
intel_guc_fini_wq(guc);
intel_guc_log_relay_destroy(guc);
}
int intel_uc_init(struct drm_i915_private *dev_priv)
{
struct intel_guc *guc = &dev_priv->guc;
int ret;
if (!USES_GUC(dev_priv))
return 0;
if (!HAS_GUC(dev_priv))
return -ENODEV;
ret = intel_guc_init(guc);
if (ret)
return ret;
if (USES_GUC_SUBMISSION(dev_priv)) {
/*
* This is stuff we need to have available at fw load time
* if we are planning to enable submission later
*/
ret = intel_guc_submission_init(guc);
if (ret) {
intel_guc_fini(guc);
return ret;
}
}
return 0;
}
void intel_uc_fini(struct drm_i915_private *dev_priv)
{
struct intel_guc *guc = &dev_priv->guc;
if (!USES_GUC(dev_priv))
return;
GEM_BUG_ON(!HAS_GUC(dev_priv));
if (USES_GUC_SUBMISSION(dev_priv))
intel_guc_submission_fini(guc);
intel_guc_fini(guc);
}
int intel_uc_init_hw(struct drm_i915_private *dev_priv)
{
struct intel_guc *guc = &dev_priv->guc;
struct intel_huc *huc = &dev_priv->huc;
int ret, attempts;
if (!USES_GUC(dev_priv))
return 0;
GEM_BUG_ON(!HAS_GUC(dev_priv));
guc_disable_communication(guc);
gen9_reset_guc_interrupts(dev_priv);
/* init WOPCM */
I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
I915_WRITE(DMA_GUC_WOPCM_OFFSET,
GUC_WOPCM_OFFSET_VALUE | HUC_LOADING_AGENT_GUC);
/* WaEnableuKernelHeaderValidFix:skl */
/* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */
if (IS_GEN9(dev_priv))
attempts = 3;
else
attempts = 1;
while (attempts--) {
/*
* Always reset the GuC just before (re)loading, so
* that the state and timing are fairly predictable
*/
ret = __intel_uc_reset_hw(dev_priv);
if (ret)
goto err_out;
if (USES_HUC(dev_priv)) {
ret = intel_huc_init_hw(huc);
if (ret)
goto err_out;
}
intel_guc_init_params(guc);
ret = intel_guc_fw_upload(guc);
if (ret == 0 || ret != -EAGAIN)
break;
DRM_DEBUG_DRIVER("GuC fw load failed: %d; will reset and "
"retry %d more time(s)\n", ret, attempts);
}
/* Did we succeded or run out of retries? */
if (ret)
goto err_log_capture;
ret = guc_enable_communication(guc);
if (ret)
goto err_log_capture;
if (USES_HUC(dev_priv)) {
ret = intel_huc_auth(huc);
if (ret)
goto err_communication;
}
if (USES_GUC_SUBMISSION(dev_priv)) {
if (i915_modparams.guc_log_level)
gen9_enable_guc_interrupts(dev_priv);
ret = intel_guc_submission_enable(guc);
if (ret)
goto err_interrupts;
}
dev_info(dev_priv->drm.dev, "GuC firmware version %u.%u\n",
guc->fw.major_ver_found, guc->fw.minor_ver_found);
dev_info(dev_priv->drm.dev, "GuC submission %s\n",
enableddisabled(USES_GUC_SUBMISSION(dev_priv)));
dev_info(dev_priv->drm.dev, "HuC %s\n",
enableddisabled(USES_HUC(dev_priv)));
return 0;
/*
* We've failed to load the firmware :(
*/
err_interrupts:
gen9_disable_guc_interrupts(dev_priv);
err_communication:
guc_disable_communication(guc);
err_log_capture:
guc_capture_load_err_log(guc);
err_out:
/*
* Note that there is no fallback as either user explicitly asked for
* the GuC or driver default option was to run with the GuC enabled.
*/
if (GEM_WARN_ON(ret == -EIO))
ret = -EINVAL;
dev_err(dev_priv->drm.dev, "GuC initialization failed %d\n", ret);
return ret;
}
void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
{
struct intel_guc *guc = &dev_priv->guc;
if (!USES_GUC(dev_priv))
return;
GEM_BUG_ON(!HAS_GUC(dev_priv));
if (USES_GUC_SUBMISSION(dev_priv))
intel_guc_submission_disable(guc);
guc_disable_communication(guc);
if (USES_GUC_SUBMISSION(dev_priv))
gen9_disable_guc_interrupts(dev_priv);
}