linux_dsm_epyc7002/drivers/gpu/drm/i915/i915_dma.c

1395 lines
40 KiB
C
Raw Normal View History

/* i915_dma.c -- DMA support for the I915 -*- linux-c -*-
*/
/*
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <drm/drmP.h>
#include <drm/drm_crtc_helper.h>
#include <drm/drm_fb_helper.h>
#include <drm/drm_legacy.h>
#include "intel_drv.h"
#include <drm/i915_drm.h>
#include "i915_drv.h"
#include "i915_vgpu.h"
#include "i915_trace.h"
#include <linux/pci.h>
#include <linux/console.h>
#include <linux/vt.h>
#include <linux/vgaarb.h>
#include <linux/acpi.h>
#include <linux/pnp.h>
vga_switcheroo: initial implementation (v15) Many new laptops now come with 2 gpus, one to be used for low power modes and one for gaming/on-ac applications. These GPUs are typically wired to the laptop panel and VGA ports via a multiplexer unit which is controlled via ACPI methods. 4 combinations of systems typically exist - with 2 ACPI methods. Intel/ATI - Lenovo W500/T500 - use ATPX ACPI method ATI/ATI - some ASUS - use ATPX ACPI Method Intel/Nvidia - - use _DSM ACPI method Nvidia/Nvidia - - use _DSM ACPI method. TODO: This patch adds support for the ATPX method and initial bits for the _DSM methods that need to written by someone with access to the hardware. Add a proper non-debugfs interface - need to get some proper testing first. v2: add power up/down support for both devices on W500 puts i915/radeon into D3 and cuts power to radeon. v3: redo probing methods, no DMI list, drm devices call to register with switcheroo, it tries to find an ATPX method on any device and once there is two devices + ATPX it inits the switcher. v4: ATPX msg handling using buffers - should work on more machines v5: rearchitect after more mjg59 discussion - move ATPX handling to radeon driver. v6: add file headers + initial nouveau bits (to be filled out). v7: merge delayed switcher code. v8: avoid suspend/resume of gpu that is off v9: rearchitect - mjg59 is always right. - move all ATPX code to radeon, should allow simpler DSM also proper ATRM handling v10: add ATRM support for radeon BIOS, add mutex to lock vgasr_priv v11: fix bug in resuming Intel for 2nd time. v12: start fixing up nvidia code blindly. v13: blindly guess at finishing nvidia code v14: remove radeon audio hacks - fix up intel resume more like upstream v15: clean up printks + remove unnecessary igd/dis pointers mount debugfs /sys/kernel/debug/vgaswitcheroo/switch - should exist if ATPX detected + 2 cards. DIS - immediate change to discrete IGD - immediate change to IGD DDIS - delayed change to discrete DIGD - delayed change to IGD ON - turn on not in use OFF - turn off not in use Tested on W500 (Intel/ATI) and T500 (Intel/ATI) Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-01 12:38:10 +07:00
#include <linux/vga_switcheroo.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 15:04:11 +07:00
#include <linux/slab.h>
#include <acpi/video.h>
#include <linux/pm.h>
#include <linux/pm_runtime.h>
#include <linux/oom.h>
static int i915_getparam(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
struct drm_i915_private *dev_priv = dev->dev_private;
drm_i915_getparam_t *param = data;
int value;
switch (param->param) {
case I915_PARAM_IRQ_ACTIVE:
case I915_PARAM_ALLOW_BATCHBUFFER:
case I915_PARAM_LAST_DISPATCH:
/* Reject all old ums/dri params. */
return -ENODEV;
case I915_PARAM_CHIPSET_ID:
value = dev->pdev->device;
break;
case I915_PARAM_REVISION:
value = dev->pdev->revision;
break;
case I915_PARAM_HAS_GEM:
value = 1;
break;
case I915_PARAM_NUM_FENCES_AVAIL:
value = dev_priv->num_fence_regs;
break;
case I915_PARAM_HAS_OVERLAY:
value = dev_priv->overlay ? 1 : 0;
break;
case I915_PARAM_HAS_PAGEFLIPPING:
value = 1;
break;
case I915_PARAM_HAS_EXECBUF2:
/* depends on GEM */
value = 1;
break;
case I915_PARAM_HAS_BSD:
value = intel_engine_initialized(&dev_priv->engine[VCS]);
break;
case I915_PARAM_HAS_BLT:
value = intel_engine_initialized(&dev_priv->engine[BCS]);
break;
case I915_PARAM_HAS_VEBOX:
value = intel_engine_initialized(&dev_priv->engine[VECS]);
break;
case I915_PARAM_HAS_BSD2:
value = intel_engine_initialized(&dev_priv->engine[VCS2]);
break;
case I915_PARAM_HAS_RELAXED_FENCING:
value = 1;
break;
case I915_PARAM_HAS_COHERENT_RINGS:
value = 1;
break;
case I915_PARAM_HAS_EXEC_CONSTANTS:
value = INTEL_INFO(dev)->gen >= 4;
break;
case I915_PARAM_HAS_RELAXED_DELTA:
value = 1;
break;
case I915_PARAM_HAS_GEN7_SOL_RESET:
value = 1;
break;
case I915_PARAM_HAS_LLC:
value = HAS_LLC(dev);
break;
case I915_PARAM_HAS_WT:
value = HAS_WT(dev);
break;
case I915_PARAM_HAS_ALIASING_PPGTT:
value = USES_PPGTT(dev);
break;
case I915_PARAM_HAS_WAIT_TIMEOUT:
value = 1;
break;
case I915_PARAM_HAS_SEMAPHORES:
value = i915_semaphore_is_enabled(dev);
break;
case I915_PARAM_HAS_PRIME_VMAP_FLUSH:
value = 1;
break;
case I915_PARAM_HAS_SECURE_BATCHES:
value = capable(CAP_SYS_ADMIN);
break;
case I915_PARAM_HAS_PINNED_BATCHES:
value = 1;
break;
case I915_PARAM_HAS_EXEC_NO_RELOC:
value = 1;
break;
case I915_PARAM_HAS_EXEC_HANDLE_LUT:
value = 1;
break;
case I915_PARAM_CMD_PARSER_VERSION:
value = i915_cmd_parser_get_version();
break;
case I915_PARAM_HAS_COHERENT_PHYS_GTT:
value = 1;
drm/i915: Support creation of unbound wc user mappings for objects This patch provides support to create write-combining virtual mappings of GEM object. It intends to provide the same funtionality of 'mmap_gtt' interface without the constraints and contention of a limited aperture space, but requires clients handles the linear to tile conversion on their own. This is for improving the CPU write operation performance, as with such mapping, writes and reads are almost 50% faster than with mmap_gtt. Similar to the GTT mmapping, unlike the regular CPU mmapping, it avoids the cache flush after update from CPU side, when object is passed onto GPU. This type of mapping is specially useful in case of sub-region update, i.e. when only a portion of the object is to be updated. Using a CPU mmap in such cases would normally incur a clflush of the whole object, and using a GTT mmapping would likely require eviction of an active object or fence and thus stall. The write-combining CPU mmap avoids both. To ensure the cache coherency, before using this mapping, the GTT domain has been reused here. This provides the required cache flush if the object is in CPU domain or synchronization against the concurrent rendering. Although the access through an uncached mmap should automatically invalidate the cache lines, this may not be true for non-temporal write instructions and also not all pages of the object may be updated at any given point of time through this mapping. Having a call to get_pages in set_to_gtt_domain function, as added in the earlier patch 'drm/i915: Broaden application of set-domain(GTT)', would guarantee the clflush and so there will be no cachelines holding the data for the object before it is accessed through this map. The drm_i915_gem_mmap structure (for the DRM_I915_GEM_MMAP_IOCTL) has been extended with a new flags field (defaulting to 0 for existent users). In order for userspace to detect the extended ioctl, a new parameter I915_PARAM_MMAP_VERSION has been added for versioning the ioctl interface. v2: Fix error handling, invalid flag detection, renaming (ickle) v3: Rebase to latest drm-intel-nightly codebase The new mmapping is exercised by igt/gem_mmap_wc, igt/gem_concurrent_blit and igt/gem_gtt_speed. Change-Id: Ie883942f9e689525f72fe9a8d3780c3a9faa769a Signed-off-by: Akash Goel <akash.goel@intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-01-02 17:59:30 +07:00
break;
case I915_PARAM_MMAP_VERSION:
value = 1;
break;
case I915_PARAM_SUBSLICE_TOTAL:
value = INTEL_INFO(dev)->subslice_total;
if (!value)
return -ENODEV;
break;
case I915_PARAM_EU_TOTAL:
value = INTEL_INFO(dev)->eu_total;
if (!value)
return -ENODEV;
break;
case I915_PARAM_HAS_GPU_RESET:
value = i915.enable_hangcheck &&
intel_has_gpu_reset(dev);
break;
case I915_PARAM_HAS_RESOURCE_STREAMER:
value = HAS_RESOURCE_STREAMER(dev);
break;
drm/i915: Add soft-pinning API for execbuffer Userspace can pass in an offset that it presumes the object is located at. The kernel will then do its utmost to fit the object into that location. The assumption is that userspace is handling its own object locations (for example along with full-ppgtt) and that the kernel will rarely have to make space for the user's requests. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris Wilson. Fixed incorrect error paths causing crash found by Michal Winiarski. (Not published externally) v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed eviction to allow eviction of soft-pinned objects when another soft-pinned object used by a subsequent execbuffer overlaps reported by Michal Winiarski. (Not published externally) v4: Moved soft-pinned objects to the front of ordered_vmas so that they are pinned first after an address conflict happens to avoid repeated conflicts in rare cases (Suggested by Chris Wilson). Expanded comment on drm_i915_gem_exec_object2.offset to cover this new API. v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability (Kristian). Added check for multiple pinnings on eviction (Akash). Made sure buffers are not considered misplaced without the user specifying EXEC_OBJECT_SUPPORTS_48B_ADDRESS. User must assume responsibility for any addressing workarounds. Updated object2.offset field comment again to clarify NO_RELOC case (Chris). checkpatch cleanup. v6: Trivial rebase on latest drm-intel-nightly v7: Catch attempts to pin above the max virtual address size and return EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt to pin something at an offset above 4GB (Chris, Daniel Vetter). Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Akash Goel <akash.goel@intel.com> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> Cc: Michal Winiarski <michal.winiarski@intel.com> Cc: Zou Nanhai <nanhai.zou@intel.com> Cc: Kristian Høgsberg <hoegsberg@gmail.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Reviewed-by: Michel Thierry <michel.thierry@intel.com> Acked-by: PDT Signed-off-by: Thomas Daniel <thomas.daniel@intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1449575707-20933-1-git-send-email-thomas.daniel@intel.com
2015-12-08 18:55:07 +07:00
case I915_PARAM_HAS_EXEC_SOFTPIN:
value = 1;
break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
}
if (copy_to_user(param->value, &value, sizeof(int))) {
DRM_ERROR("copy_to_user failed\n");
return -EFAULT;
}
return 0;
}
static int i915_get_bridge_dev(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
dev_priv->bridge_dev = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0));
if (!dev_priv->bridge_dev) {
DRM_ERROR("bridge device not found\n");
return -1;
}
return 0;
}
#define MCHBAR_I915 0x44
#define MCHBAR_I965 0x48
#define MCHBAR_SIZE (4*4096)
#define DEVEN_REG 0x54
#define DEVEN_MCHBAR_EN (1 << 28)
/* Allocate space for the MCH regs if needed, return nonzero on error */
static int
intel_alloc_mchbar_resource(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int reg = INTEL_INFO(dev)->gen >= 4 ? MCHBAR_I965 : MCHBAR_I915;
u32 temp_lo, temp_hi = 0;
u64 mchbar_addr;
int ret;
if (INTEL_INFO(dev)->gen >= 4)
pci_read_config_dword(dev_priv->bridge_dev, reg + 4, &temp_hi);
pci_read_config_dword(dev_priv->bridge_dev, reg, &temp_lo);
mchbar_addr = ((u64)temp_hi << 32) | temp_lo;
/* If ACPI doesn't have it, assume we need to allocate it ourselves */
#ifdef CONFIG_PNP
if (mchbar_addr &&
pnp_range_reserved(mchbar_addr, mchbar_addr + MCHBAR_SIZE))
return 0;
#endif
/* Get some space for it */
dev_priv->mch_res.name = "i915 MCHBAR";
dev_priv->mch_res.flags = IORESOURCE_MEM;
ret = pci_bus_alloc_resource(dev_priv->bridge_dev->bus,
&dev_priv->mch_res,
MCHBAR_SIZE, MCHBAR_SIZE,
PCIBIOS_MIN_MEM,
0, pcibios_align_resource,
dev_priv->bridge_dev);
if (ret) {
DRM_DEBUG_DRIVER("failed bus alloc: %d\n", ret);
dev_priv->mch_res.start = 0;
return ret;
}
if (INTEL_INFO(dev)->gen >= 4)
pci_write_config_dword(dev_priv->bridge_dev, reg + 4,
upper_32_bits(dev_priv->mch_res.start));
pci_write_config_dword(dev_priv->bridge_dev, reg,
lower_32_bits(dev_priv->mch_res.start));
return 0;
}
/* Setup MCHBAR if possible, return true if we should disable it again */
static void
intel_setup_mchbar(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int mchbar_reg = INTEL_INFO(dev)->gen >= 4 ? MCHBAR_I965 : MCHBAR_I915;
u32 temp;
bool enabled;
if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev))
return;
dev_priv->mchbar_need_disable = false;
if (IS_I915G(dev) || IS_I915GM(dev)) {
pci_read_config_dword(dev_priv->bridge_dev, DEVEN_REG, &temp);
enabled = !!(temp & DEVEN_MCHBAR_EN);
} else {
pci_read_config_dword(dev_priv->bridge_dev, mchbar_reg, &temp);
enabled = temp & 1;
}
/* If it's already enabled, don't have to do anything */
if (enabled)
return;
if (intel_alloc_mchbar_resource(dev))
return;
dev_priv->mchbar_need_disable = true;
/* Space is allocated or reserved, so enable it. */
if (IS_I915G(dev) || IS_I915GM(dev)) {
pci_write_config_dword(dev_priv->bridge_dev, DEVEN_REG,
temp | DEVEN_MCHBAR_EN);
} else {
pci_read_config_dword(dev_priv->bridge_dev, mchbar_reg, &temp);
pci_write_config_dword(dev_priv->bridge_dev, mchbar_reg, temp | 1);
}
}
static void
intel_teardown_mchbar(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int mchbar_reg = INTEL_INFO(dev)->gen >= 4 ? MCHBAR_I965 : MCHBAR_I915;
u32 temp;
if (dev_priv->mchbar_need_disable) {
if (IS_I915G(dev) || IS_I915GM(dev)) {
pci_read_config_dword(dev_priv->bridge_dev, DEVEN_REG, &temp);
temp &= ~DEVEN_MCHBAR_EN;
pci_write_config_dword(dev_priv->bridge_dev, DEVEN_REG, temp);
} else {
pci_read_config_dword(dev_priv->bridge_dev, mchbar_reg, &temp);
temp &= ~1;
pci_write_config_dword(dev_priv->bridge_dev, mchbar_reg, temp);
}
}
if (dev_priv->mch_res.start)
release_resource(&dev_priv->mch_res);
}
/* true = enable decode, false = disable decoder */
static unsigned int i915_vga_set_decode(void *cookie, bool state)
{
struct drm_device *dev = cookie;
intel_modeset_vga_set_state(dev, state);
if (state)
return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
else
return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
}
vga_switcheroo: initial implementation (v15) Many new laptops now come with 2 gpus, one to be used for low power modes and one for gaming/on-ac applications. These GPUs are typically wired to the laptop panel and VGA ports via a multiplexer unit which is controlled via ACPI methods. 4 combinations of systems typically exist - with 2 ACPI methods. Intel/ATI - Lenovo W500/T500 - use ATPX ACPI method ATI/ATI - some ASUS - use ATPX ACPI Method Intel/Nvidia - - use _DSM ACPI method Nvidia/Nvidia - - use _DSM ACPI method. TODO: This patch adds support for the ATPX method and initial bits for the _DSM methods that need to written by someone with access to the hardware. Add a proper non-debugfs interface - need to get some proper testing first. v2: add power up/down support for both devices on W500 puts i915/radeon into D3 and cuts power to radeon. v3: redo probing methods, no DMI list, drm devices call to register with switcheroo, it tries to find an ATPX method on any device and once there is two devices + ATPX it inits the switcher. v4: ATPX msg handling using buffers - should work on more machines v5: rearchitect after more mjg59 discussion - move ATPX handling to radeon driver. v6: add file headers + initial nouveau bits (to be filled out). v7: merge delayed switcher code. v8: avoid suspend/resume of gpu that is off v9: rearchitect - mjg59 is always right. - move all ATPX code to radeon, should allow simpler DSM also proper ATRM handling v10: add ATRM support for radeon BIOS, add mutex to lock vgasr_priv v11: fix bug in resuming Intel for 2nd time. v12: start fixing up nvidia code blindly. v13: blindly guess at finishing nvidia code v14: remove radeon audio hacks - fix up intel resume more like upstream v15: clean up printks + remove unnecessary igd/dis pointers mount debugfs /sys/kernel/debug/vgaswitcheroo/switch - should exist if ATPX detected + 2 cards. DIS - immediate change to discrete IGD - immediate change to IGD DDIS - delayed change to discrete DIGD - delayed change to IGD ON - turn on not in use OFF - turn off not in use Tested on W500 (Intel/ATI) and T500 (Intel/ATI) Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-01 12:38:10 +07:00
static void i915_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
{
struct drm_device *dev = pci_get_drvdata(pdev);
pm_message_t pmm = { .event = PM_EVENT_SUSPEND };
vga_switcheroo: initial implementation (v15) Many new laptops now come with 2 gpus, one to be used for low power modes and one for gaming/on-ac applications. These GPUs are typically wired to the laptop panel and VGA ports via a multiplexer unit which is controlled via ACPI methods. 4 combinations of systems typically exist - with 2 ACPI methods. Intel/ATI - Lenovo W500/T500 - use ATPX ACPI method ATI/ATI - some ASUS - use ATPX ACPI Method Intel/Nvidia - - use _DSM ACPI method Nvidia/Nvidia - - use _DSM ACPI method. TODO: This patch adds support for the ATPX method and initial bits for the _DSM methods that need to written by someone with access to the hardware. Add a proper non-debugfs interface - need to get some proper testing first. v2: add power up/down support for both devices on W500 puts i915/radeon into D3 and cuts power to radeon. v3: redo probing methods, no DMI list, drm devices call to register with switcheroo, it tries to find an ATPX method on any device and once there is two devices + ATPX it inits the switcher. v4: ATPX msg handling using buffers - should work on more machines v5: rearchitect after more mjg59 discussion - move ATPX handling to radeon driver. v6: add file headers + initial nouveau bits (to be filled out). v7: merge delayed switcher code. v8: avoid suspend/resume of gpu that is off v9: rearchitect - mjg59 is always right. - move all ATPX code to radeon, should allow simpler DSM also proper ATRM handling v10: add ATRM support for radeon BIOS, add mutex to lock vgasr_priv v11: fix bug in resuming Intel for 2nd time. v12: start fixing up nvidia code blindly. v13: blindly guess at finishing nvidia code v14: remove radeon audio hacks - fix up intel resume more like upstream v15: clean up printks + remove unnecessary igd/dis pointers mount debugfs /sys/kernel/debug/vgaswitcheroo/switch - should exist if ATPX detected + 2 cards. DIS - immediate change to discrete IGD - immediate change to IGD DDIS - delayed change to discrete DIGD - delayed change to IGD ON - turn on not in use OFF - turn off not in use Tested on W500 (Intel/ATI) and T500 (Intel/ATI) Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-01 12:38:10 +07:00
if (state == VGA_SWITCHEROO_ON) {
pr_info("switched on\n");
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
vga_switcheroo: initial implementation (v15) Many new laptops now come with 2 gpus, one to be used for low power modes and one for gaming/on-ac applications. These GPUs are typically wired to the laptop panel and VGA ports via a multiplexer unit which is controlled via ACPI methods. 4 combinations of systems typically exist - with 2 ACPI methods. Intel/ATI - Lenovo W500/T500 - use ATPX ACPI method ATI/ATI - some ASUS - use ATPX ACPI Method Intel/Nvidia - - use _DSM ACPI method Nvidia/Nvidia - - use _DSM ACPI method. TODO: This patch adds support for the ATPX method and initial bits for the _DSM methods that need to written by someone with access to the hardware. Add a proper non-debugfs interface - need to get some proper testing first. v2: add power up/down support for both devices on W500 puts i915/radeon into D3 and cuts power to radeon. v3: redo probing methods, no DMI list, drm devices call to register with switcheroo, it tries to find an ATPX method on any device and once there is two devices + ATPX it inits the switcher. v4: ATPX msg handling using buffers - should work on more machines v5: rearchitect after more mjg59 discussion - move ATPX handling to radeon driver. v6: add file headers + initial nouveau bits (to be filled out). v7: merge delayed switcher code. v8: avoid suspend/resume of gpu that is off v9: rearchitect - mjg59 is always right. - move all ATPX code to radeon, should allow simpler DSM also proper ATRM handling v10: add ATRM support for radeon BIOS, add mutex to lock vgasr_priv v11: fix bug in resuming Intel for 2nd time. v12: start fixing up nvidia code blindly. v13: blindly guess at finishing nvidia code v14: remove radeon audio hacks - fix up intel resume more like upstream v15: clean up printks + remove unnecessary igd/dis pointers mount debugfs /sys/kernel/debug/vgaswitcheroo/switch - should exist if ATPX detected + 2 cards. DIS - immediate change to discrete IGD - immediate change to IGD DDIS - delayed change to discrete DIGD - delayed change to IGD ON - turn on not in use OFF - turn off not in use Tested on W500 (Intel/ATI) and T500 (Intel/ATI) Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-01 12:38:10 +07:00
/* i915 resume handler doesn't set to D0 */
pci_set_power_state(dev->pdev, PCI_D0);
i915_resume_switcheroo(dev);
dev->switch_power_state = DRM_SWITCH_POWER_ON;
vga_switcheroo: initial implementation (v15) Many new laptops now come with 2 gpus, one to be used for low power modes and one for gaming/on-ac applications. These GPUs are typically wired to the laptop panel and VGA ports via a multiplexer unit which is controlled via ACPI methods. 4 combinations of systems typically exist - with 2 ACPI methods. Intel/ATI - Lenovo W500/T500 - use ATPX ACPI method ATI/ATI - some ASUS - use ATPX ACPI Method Intel/Nvidia - - use _DSM ACPI method Nvidia/Nvidia - - use _DSM ACPI method. TODO: This patch adds support for the ATPX method and initial bits for the _DSM methods that need to written by someone with access to the hardware. Add a proper non-debugfs interface - need to get some proper testing first. v2: add power up/down support for both devices on W500 puts i915/radeon into D3 and cuts power to radeon. v3: redo probing methods, no DMI list, drm devices call to register with switcheroo, it tries to find an ATPX method on any device and once there is two devices + ATPX it inits the switcher. v4: ATPX msg handling using buffers - should work on more machines v5: rearchitect after more mjg59 discussion - move ATPX handling to radeon driver. v6: add file headers + initial nouveau bits (to be filled out). v7: merge delayed switcher code. v8: avoid suspend/resume of gpu that is off v9: rearchitect - mjg59 is always right. - move all ATPX code to radeon, should allow simpler DSM also proper ATRM handling v10: add ATRM support for radeon BIOS, add mutex to lock vgasr_priv v11: fix bug in resuming Intel for 2nd time. v12: start fixing up nvidia code blindly. v13: blindly guess at finishing nvidia code v14: remove radeon audio hacks - fix up intel resume more like upstream v15: clean up printks + remove unnecessary igd/dis pointers mount debugfs /sys/kernel/debug/vgaswitcheroo/switch - should exist if ATPX detected + 2 cards. DIS - immediate change to discrete IGD - immediate change to IGD DDIS - delayed change to discrete DIGD - delayed change to IGD ON - turn on not in use OFF - turn off not in use Tested on W500 (Intel/ATI) and T500 (Intel/ATI) Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-01 12:38:10 +07:00
} else {
pr_info("switched off\n");
dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
i915_suspend_switcheroo(dev, pmm);
dev->switch_power_state = DRM_SWITCH_POWER_OFF;
vga_switcheroo: initial implementation (v15) Many new laptops now come with 2 gpus, one to be used for low power modes and one for gaming/on-ac applications. These GPUs are typically wired to the laptop panel and VGA ports via a multiplexer unit which is controlled via ACPI methods. 4 combinations of systems typically exist - with 2 ACPI methods. Intel/ATI - Lenovo W500/T500 - use ATPX ACPI method ATI/ATI - some ASUS - use ATPX ACPI Method Intel/Nvidia - - use _DSM ACPI method Nvidia/Nvidia - - use _DSM ACPI method. TODO: This patch adds support for the ATPX method and initial bits for the _DSM methods that need to written by someone with access to the hardware. Add a proper non-debugfs interface - need to get some proper testing first. v2: add power up/down support for both devices on W500 puts i915/radeon into D3 and cuts power to radeon. v3: redo probing methods, no DMI list, drm devices call to register with switcheroo, it tries to find an ATPX method on any device and once there is two devices + ATPX it inits the switcher. v4: ATPX msg handling using buffers - should work on more machines v5: rearchitect after more mjg59 discussion - move ATPX handling to radeon driver. v6: add file headers + initial nouveau bits (to be filled out). v7: merge delayed switcher code. v8: avoid suspend/resume of gpu that is off v9: rearchitect - mjg59 is always right. - move all ATPX code to radeon, should allow simpler DSM also proper ATRM handling v10: add ATRM support for radeon BIOS, add mutex to lock vgasr_priv v11: fix bug in resuming Intel for 2nd time. v12: start fixing up nvidia code blindly. v13: blindly guess at finishing nvidia code v14: remove radeon audio hacks - fix up intel resume more like upstream v15: clean up printks + remove unnecessary igd/dis pointers mount debugfs /sys/kernel/debug/vgaswitcheroo/switch - should exist if ATPX detected + 2 cards. DIS - immediate change to discrete IGD - immediate change to IGD DDIS - delayed change to discrete DIGD - delayed change to IGD ON - turn on not in use OFF - turn off not in use Tested on W500 (Intel/ATI) and T500 (Intel/ATI) Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-01 12:38:10 +07:00
}
}
static bool i915_switcheroo_can_switch(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
drm: Rip out totally bogus vga_switcheroo->can_switch locking So I just wanted to add a new field to struct drm_device and accidentally stumbled over something. According to comments dev->open_count is protected by dev->count_lock, but that's totally not the case. It's protected by drm_global_mutex. Unfortunately the vga switcheroo callbacks took this comment at face value. The problem is that we can't just take the drm_global_mutex because: - It would lead to a locking inversion with the driver load/unload paths. - It wouldn't actually protect anything, for that we'd need to wrap the entire vga switcheroo code in the drm_global_mutex. And I'm not sure whether that would actually solve anything. What we probably want is a try_to_grab_switcheroo reference kind of thing which is used in the driver's ->open callback. Then we could move all that ->can_switch madness into the vga switcheroo core where it really belongs. But since that would amount to real work take the easy way out and just add a comment. It's definitely not going to make anything worse since doing switcheroo state changes while restarting X just isn't recommended. Even though the delayed switching code does exactly that. v2: - Simplify the ->can_switch implementations more (Thierry) - Fix comment about the dev->open_count locking (Thierry) Cc: Thierry Reding <treding@nvidia.com> Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> (v1) Reviewed-by: Thierry Reding <treding@nvidia.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-11-04 02:46:34 +07:00
/*
* FIXME: open_count is protected by drm_global_mutex but that would lead to
* locking inversion with the driver load path. And the access here is
* completely racy anyway. So don't bother with locking for now.
*/
return dev->open_count == 0;
vga_switcheroo: initial implementation (v15) Many new laptops now come with 2 gpus, one to be used for low power modes and one for gaming/on-ac applications. These GPUs are typically wired to the laptop panel and VGA ports via a multiplexer unit which is controlled via ACPI methods. 4 combinations of systems typically exist - with 2 ACPI methods. Intel/ATI - Lenovo W500/T500 - use ATPX ACPI method ATI/ATI - some ASUS - use ATPX ACPI Method Intel/Nvidia - - use _DSM ACPI method Nvidia/Nvidia - - use _DSM ACPI method. TODO: This patch adds support for the ATPX method and initial bits for the _DSM methods that need to written by someone with access to the hardware. Add a proper non-debugfs interface - need to get some proper testing first. v2: add power up/down support for both devices on W500 puts i915/radeon into D3 and cuts power to radeon. v3: redo probing methods, no DMI list, drm devices call to register with switcheroo, it tries to find an ATPX method on any device and once there is two devices + ATPX it inits the switcher. v4: ATPX msg handling using buffers - should work on more machines v5: rearchitect after more mjg59 discussion - move ATPX handling to radeon driver. v6: add file headers + initial nouveau bits (to be filled out). v7: merge delayed switcher code. v8: avoid suspend/resume of gpu that is off v9: rearchitect - mjg59 is always right. - move all ATPX code to radeon, should allow simpler DSM also proper ATRM handling v10: add ATRM support for radeon BIOS, add mutex to lock vgasr_priv v11: fix bug in resuming Intel for 2nd time. v12: start fixing up nvidia code blindly. v13: blindly guess at finishing nvidia code v14: remove radeon audio hacks - fix up intel resume more like upstream v15: clean up printks + remove unnecessary igd/dis pointers mount debugfs /sys/kernel/debug/vgaswitcheroo/switch - should exist if ATPX detected + 2 cards. DIS - immediate change to discrete IGD - immediate change to IGD DDIS - delayed change to discrete DIGD - delayed change to IGD ON - turn on not in use OFF - turn off not in use Tested on W500 (Intel/ATI) and T500 (Intel/ATI) Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-01 12:38:10 +07:00
}
static const struct vga_switcheroo_client_ops i915_switcheroo_ops = {
.set_gpu_state = i915_switcheroo_set_state,
.reprobe = NULL,
.can_switch = i915_switcheroo_can_switch,
};
static int i915_load_modeset_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int ret;
ret = intel_bios_init(dev_priv);
if (ret)
DRM_INFO("failed to find VBIOS tables\n");
/* If we have > 1 VGA cards, then we need to arbitrate access
* to the common VGA resources.
*
* If we are a secondary display controller (!PCI_DISPLAY_CLASS_VGA),
* then we do not take part in VGA arbitration and the
* vga_client_register() fails with -ENODEV.
*/
Revert "i915: Update VGA arbiter support for newer devices" This reverts commit 81b5c7bc8de3e6f63419139c2fc91bf81dea8a7d. Adding drm/i915 into the vga arbiter chain means that X (in a piece of well-meant paranoia) will do a get/put on the vga decoding around _every_ accel call down into the ddx. Which results in some nice performance disasters [1]. This really breaks userspace, by disabling DRI for everyone, and stops OpenGL from working, this isn't limited to just the i915 but both the integrated and discrete GPUs on multi-gpu systems, in other words this causes untold worlds of pain, Ville tried to come up with a Great Hack to fiddle the required VGA I/O ops behind everyone's back using stop_machine, but that didn't really work out [2]. Given that we're fairly late in the -rc stage for such games let's just revert this all. One thing we might want to keep is to delay the disabling of the vga decoding until the fbdev emulation and the fbcon screen is set up. If we kill vga mem decoding beforehand fbcon can end up with a white square in the top-left corner it tried to save from the vga memory for a seamless transition. And we have bug reports on older platforms which seem to match these symptoms. But again that's something to play around with in -next. References: [1] http://lists.x.org/archives/xorg-devel/2013-September/037763.html References: [2] http://www.spinics.net/lists/intel-gfx/msg34062.html Cc: Alex Williamson <alex.williamson@redhat.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Dave Airlie <airlied@redhat.com>
2013-10-11 12:12:04 +07:00
ret = vga_client_register(dev->pdev, dev, NULL, i915_vga_set_decode);
if (ret && ret != -ENODEV)
goto out;
intel_register_dsm_handler();
ret = vga_switcheroo_register_client(dev->pdev, &i915_switcheroo_ops, false);
vga_switcheroo: initial implementation (v15) Many new laptops now come with 2 gpus, one to be used for low power modes and one for gaming/on-ac applications. These GPUs are typically wired to the laptop panel and VGA ports via a multiplexer unit which is controlled via ACPI methods. 4 combinations of systems typically exist - with 2 ACPI methods. Intel/ATI - Lenovo W500/T500 - use ATPX ACPI method ATI/ATI - some ASUS - use ATPX ACPI Method Intel/Nvidia - - use _DSM ACPI method Nvidia/Nvidia - - use _DSM ACPI method. TODO: This patch adds support for the ATPX method and initial bits for the _DSM methods that need to written by someone with access to the hardware. Add a proper non-debugfs interface - need to get some proper testing first. v2: add power up/down support for both devices on W500 puts i915/radeon into D3 and cuts power to radeon. v3: redo probing methods, no DMI list, drm devices call to register with switcheroo, it tries to find an ATPX method on any device and once there is two devices + ATPX it inits the switcher. v4: ATPX msg handling using buffers - should work on more machines v5: rearchitect after more mjg59 discussion - move ATPX handling to radeon driver. v6: add file headers + initial nouveau bits (to be filled out). v7: merge delayed switcher code. v8: avoid suspend/resume of gpu that is off v9: rearchitect - mjg59 is always right. - move all ATPX code to radeon, should allow simpler DSM also proper ATRM handling v10: add ATRM support for radeon BIOS, add mutex to lock vgasr_priv v11: fix bug in resuming Intel for 2nd time. v12: start fixing up nvidia code blindly. v13: blindly guess at finishing nvidia code v14: remove radeon audio hacks - fix up intel resume more like upstream v15: clean up printks + remove unnecessary igd/dis pointers mount debugfs /sys/kernel/debug/vgaswitcheroo/switch - should exist if ATPX detected + 2 cards. DIS - immediate change to discrete IGD - immediate change to IGD DDIS - delayed change to discrete DIGD - delayed change to IGD ON - turn on not in use OFF - turn off not in use Tested on W500 (Intel/ATI) and T500 (Intel/ATI) Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-01 12:38:10 +07:00
if (ret)
goto cleanup_vga_client;
vga_switcheroo: initial implementation (v15) Many new laptops now come with 2 gpus, one to be used for low power modes and one for gaming/on-ac applications. These GPUs are typically wired to the laptop panel and VGA ports via a multiplexer unit which is controlled via ACPI methods. 4 combinations of systems typically exist - with 2 ACPI methods. Intel/ATI - Lenovo W500/T500 - use ATPX ACPI method ATI/ATI - some ASUS - use ATPX ACPI Method Intel/Nvidia - - use _DSM ACPI method Nvidia/Nvidia - - use _DSM ACPI method. TODO: This patch adds support for the ATPX method and initial bits for the _DSM methods that need to written by someone with access to the hardware. Add a proper non-debugfs interface - need to get some proper testing first. v2: add power up/down support for both devices on W500 puts i915/radeon into D3 and cuts power to radeon. v3: redo probing methods, no DMI list, drm devices call to register with switcheroo, it tries to find an ATPX method on any device and once there is two devices + ATPX it inits the switcher. v4: ATPX msg handling using buffers - should work on more machines v5: rearchitect after more mjg59 discussion - move ATPX handling to radeon driver. v6: add file headers + initial nouveau bits (to be filled out). v7: merge delayed switcher code. v8: avoid suspend/resume of gpu that is off v9: rearchitect - mjg59 is always right. - move all ATPX code to radeon, should allow simpler DSM also proper ATRM handling v10: add ATRM support for radeon BIOS, add mutex to lock vgasr_priv v11: fix bug in resuming Intel for 2nd time. v12: start fixing up nvidia code blindly. v13: blindly guess at finishing nvidia code v14: remove radeon audio hacks - fix up intel resume more like upstream v15: clean up printks + remove unnecessary igd/dis pointers mount debugfs /sys/kernel/debug/vgaswitcheroo/switch - should exist if ATPX detected + 2 cards. DIS - immediate change to discrete IGD - immediate change to IGD DDIS - delayed change to discrete DIGD - delayed change to IGD ON - turn on not in use OFF - turn off not in use Tested on W500 (Intel/ATI) and T500 (Intel/ATI) Signed-off-by: Dave Airlie <airlied@redhat.com>
2010-02-01 12:38:10 +07:00
drm/i915/skl: init/uninit display core as part of the HW power domain state We need to initialize the display core part early, before initializing the rest of the display power state. This is also described in the bspec termed "Display initialization sequence". Atm we run this sequence during driver loading after power domain HW state initialization which is too late and during runtime suspend/resume which is unneeded and can interere with DMC functionality which handles HW resources toggled by this init/uninit sequence automatically. The init sequence must be run as the first step of HW power state initialization and during system resume. The uninit sequence must be run during system suspend. To address the above move the init sequence to the initial HW power state setup and the uninit sequence to a new power domains suspend function called during system suspend. As part of the init sequence we also have to reprogram the DMC firmware as it's lost across a system suspend/resume cycle. After this change CD clock initialization during driver loading will happen only later after other dependent HW/SW parts are initialized, while during system resume it will get initialized as the last step of the init sequence. This distinction can be removed by some refactoring of platform independent parts. I left this refactoring out from this series since I didn't want to change non-SKL parts. This is a TODO for later. v2: - fix error path in i915_drm_suspend_late() - don't try to re-program the DMC firmware if it failed to load Signed-off-by: Imre Deak <imre.deak@intel.com> Reviewed-by: Patrik Jakobsson <patrik.jakobsson@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1447774433-20834-1-git-send-email-imre.deak@intel.com
2015-11-17 22:33:53 +07:00
intel_power_domains_init_hw(dev_priv, false);
intel_csr_ucode_init(dev_priv);
ret = intel_irq_install(dev_priv);
if (ret)
goto cleanup_csr;
intel_setup_gmbus(dev);
/* Important: The output setup functions called by modeset_init need
* working irqs for e.g. gmbus and dp aux transfers. */
intel_modeset_init(dev);
intel_guc_ucode_init(dev);
ret = i915_gem_init(dev);
if (ret)
goto cleanup_irq;
intel_modeset_gem_init(dev);
/* Always safe in the mode setting case. */
/* FIXME: do pre/post-mode set stuff in core KMS code */
dev->vblank_disable_allowed = true;
if (INTEL_INFO(dev)->num_pipes == 0)
return 0;
ret = intel_fbdev_init(dev);
if (ret)
goto cleanup_gem;
drm/i915: Fixup hpd irq register setup ordering For GMCH platforms we set up the hpd irq registers in the irq postinstall hook. But since we only enable the irq sources we actually need in PORT_HOTPLUG_EN/STATUS, taking dev_priv->hotplug_supported_mask into account, no hpd interrupt sources is enabled since commit 52d7ecedac3f96fb562cb482c139015372728638 Author: Daniel Vetter <daniel.vetter@ffwll.ch> Date: Sat Dec 1 21:03:22 2012 +0100 drm/i915: reorder setup sequence to have irqs for output setup Wrongly set-up interrupts also lead to broken hw-based load-detection on at least GM45, resulting in ghost VGA/TV-out outputs. To fix this, delay the hotplug register setup until after all outputs are set up, by moving it into a new dev_priv->display.hpd_irq_callback. We might also move the PCH_SPLIT platforms to such a setup eventually. Another funny part is that we need to delay the fbdev initial config probing until after the hpd regs are setup, for otherwise it'll detect ghost outputs. But we can only enable the hpd interrupt handling itself (and the output polling) _after_ that initial scan, due to massive locking brain-damage in the fbdev setup code. Add a big comment to explain this cute little dragon lair. v2: Encapsulate all the fbdev handling by wrapping the move call into intel_fbdev_initial_config in intel_fb.c. Requested by Chris Wilson. v3: Applied bikeshed from Jesse Barnes. v4: Imre Deak noticed that we also need to call intel_hpd_init after the drm_irqinstall calls in the gpu reset and resume paths - otherwise hotplug will be broken. Also improve the comment a bit about why hpd_init needs to be called before we set up the initial fbdev config. Bugzilla: Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=54943 Reported-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> (v3) Reviewed-by: Imre Deak <imre.deak@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-12-11 20:05:07 +07:00
/* Only enable hotplug handling once the fbdev is fully set up. */
intel_hpd_init(dev_priv);
drm/i915: Fixup hpd irq register setup ordering For GMCH platforms we set up the hpd irq registers in the irq postinstall hook. But since we only enable the irq sources we actually need in PORT_HOTPLUG_EN/STATUS, taking dev_priv->hotplug_supported_mask into account, no hpd interrupt sources is enabled since commit 52d7ecedac3f96fb562cb482c139015372728638 Author: Daniel Vetter <daniel.vetter@ffwll.ch> Date: Sat Dec 1 21:03:22 2012 +0100 drm/i915: reorder setup sequence to have irqs for output setup Wrongly set-up interrupts also lead to broken hw-based load-detection on at least GM45, resulting in ghost VGA/TV-out outputs. To fix this, delay the hotplug register setup until after all outputs are set up, by moving it into a new dev_priv->display.hpd_irq_callback. We might also move the PCH_SPLIT platforms to such a setup eventually. Another funny part is that we need to delay the fbdev initial config probing until after the hpd regs are setup, for otherwise it'll detect ghost outputs. But we can only enable the hpd interrupt handling itself (and the output polling) _after_ that initial scan, due to massive locking brain-damage in the fbdev setup code. Add a big comment to explain this cute little dragon lair. v2: Encapsulate all the fbdev handling by wrapping the move call into intel_fbdev_initial_config in intel_fb.c. Requested by Chris Wilson. v3: Applied bikeshed from Jesse Barnes. v4: Imre Deak noticed that we also need to call intel_hpd_init after the drm_irqinstall calls in the gpu reset and resume paths - otherwise hotplug will be broken. Also improve the comment a bit about why hpd_init needs to be called before we set up the initial fbdev config. Bugzilla: Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=54943 Reported-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> (v3) Reviewed-by: Imre Deak <imre.deak@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-12-11 20:05:07 +07:00
/*
* Some ports require correctly set-up hpd registers for detection to
* work properly (leading to ghost connected connector status), e.g. VGA
* on gm45. Hence we can only set up the initial fbdev config after hpd
* irqs are fully enabled. Now we should scan for the initial config
* only once hotplug handling is enabled, but due to screwed-up locking
* around kms/fbdev init we can't protect the fdbev initial config
* scanning against hotplug events. Hence do this first and ignore the
* tiny window where we will loose hotplug notifactions.
*/
intel_fbdev_initial_config_async(dev);
drm/i915: Fixup hpd irq register setup ordering For GMCH platforms we set up the hpd irq registers in the irq postinstall hook. But since we only enable the irq sources we actually need in PORT_HOTPLUG_EN/STATUS, taking dev_priv->hotplug_supported_mask into account, no hpd interrupt sources is enabled since commit 52d7ecedac3f96fb562cb482c139015372728638 Author: Daniel Vetter <daniel.vetter@ffwll.ch> Date: Sat Dec 1 21:03:22 2012 +0100 drm/i915: reorder setup sequence to have irqs for output setup Wrongly set-up interrupts also lead to broken hw-based load-detection on at least GM45, resulting in ghost VGA/TV-out outputs. To fix this, delay the hotplug register setup until after all outputs are set up, by moving it into a new dev_priv->display.hpd_irq_callback. We might also move the PCH_SPLIT platforms to such a setup eventually. Another funny part is that we need to delay the fbdev initial config probing until after the hpd regs are setup, for otherwise it'll detect ghost outputs. But we can only enable the hpd interrupt handling itself (and the output polling) _after_ that initial scan, due to massive locking brain-damage in the fbdev setup code. Add a big comment to explain this cute little dragon lair. v2: Encapsulate all the fbdev handling by wrapping the move call into intel_fbdev_initial_config in intel_fb.c. Requested by Chris Wilson. v3: Applied bikeshed from Jesse Barnes. v4: Imre Deak noticed that we also need to call intel_hpd_init after the drm_irqinstall calls in the gpu reset and resume paths - otherwise hotplug will be broken. Also improve the comment a bit about why hpd_init needs to be called before we set up the initial fbdev config. Bugzilla: Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=54943 Reported-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> (v3) Reviewed-by: Imre Deak <imre.deak@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-12-11 20:05:07 +07:00
drm_kms_helper_poll_init(dev);
return 0;
cleanup_gem:
mutex_lock(&dev->struct_mutex);
i915_gem_cleanup_engines(dev);
i915_gem_context_fini(dev);
mutex_unlock(&dev->struct_mutex);
cleanup_irq:
intel_guc_ucode_fini(dev);
drm_irq_uninstall(dev);
intel_teardown_gmbus(dev);
cleanup_csr:
intel_csr_ucode_fini(dev_priv);
vga_switcheroo_unregister_client(dev->pdev);
cleanup_vga_client:
vga_client_register(dev->pdev, NULL, NULL, NULL);
out:
return ret;
}
#if IS_ENABLED(CONFIG_FB)
static int i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv)
{
struct apertures_struct *ap;
struct pci_dev *pdev = dev_priv->dev->pdev;
bool primary;
int ret;
ap = alloc_apertures(1);
if (!ap)
return -ENOMEM;
ap->ranges[0].base = dev_priv->gtt.mappable_base;
ap->ranges[0].size = dev_priv->gtt.mappable_end;
primary =
pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
ret = remove_conflicting_framebuffers(ap, "inteldrmfb", primary);
kfree(ap);
return ret;
}
#else
static int i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv)
{
return 0;
}
#endif
#if !defined(CONFIG_VGA_CONSOLE)
static int i915_kick_out_vgacon(struct drm_i915_private *dev_priv)
{
return 0;
}
#elif !defined(CONFIG_DUMMY_CONSOLE)
static int i915_kick_out_vgacon(struct drm_i915_private *dev_priv)
{
return -ENODEV;
}
#else
static int i915_kick_out_vgacon(struct drm_i915_private *dev_priv)
{
int ret = 0;
DRM_INFO("Replacing VGA console driver\n");
console_lock();
if (con_is_bound(&vga_con))
ret = do_take_over_console(&dummy_con, 0, MAX_NR_CONSOLES - 1, 1);
if (ret == 0) {
ret = do_unregister_con_driver(&vga_con);
/* Ignore "already unregistered". */
if (ret == -ENODEV)
ret = 0;
}
console_unlock();
return ret;
}
#endif
static void i915_dump_device_info(struct drm_i915_private *dev_priv)
{
const struct intel_device_info *info = &dev_priv->info;
#define PRINT_S(name) "%s"
#define SEP_EMPTY
#define PRINT_FLAG(name) info->name ? #name "," : ""
#define SEP_COMMA ,
DRM_DEBUG_DRIVER("i915 device info: gen=%i, pciid=0x%04x rev=0x%02x flags="
DEV_INFO_FOR_EACH_FLAG(PRINT_S, SEP_EMPTY),
info->gen,
dev_priv->dev->pdev->device,
dev_priv->dev->pdev->revision,
DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG, SEP_COMMA));
#undef PRINT_S
#undef SEP_EMPTY
#undef PRINT_FLAG
#undef SEP_COMMA
}
static void cherryview_sseu_info_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_device_info *info;
u32 fuse, eu_dis;
info = (struct intel_device_info *)&dev_priv->info;
fuse = I915_READ(CHV_FUSE_GT);
info->slice_total = 1;
if (!(fuse & CHV_FGT_DISABLE_SS0)) {
info->subslice_per_slice++;
eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK |
CHV_FGT_EU_DIS_SS0_R1_MASK);
info->eu_total += 8 - hweight32(eu_dis);
}
if (!(fuse & CHV_FGT_DISABLE_SS1)) {
info->subslice_per_slice++;
eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK |
CHV_FGT_EU_DIS_SS1_R1_MASK);
info->eu_total += 8 - hweight32(eu_dis);
}
info->subslice_total = info->subslice_per_slice;
/*
* CHV expected to always have a uniform distribution of EU
* across subslices.
*/
info->eu_per_subslice = info->subslice_total ?
info->eu_total / info->subslice_total :
0;
/*
* CHV supports subslice power gating on devices with more than
* one subslice, and supports EU power gating on devices with
* more than one EU pair per subslice.
*/
info->has_slice_pg = 0;
info->has_subslice_pg = (info->subslice_total > 1);
info->has_eu_pg = (info->eu_per_subslice > 2);
}
static void gen9_sseu_info_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_device_info *info;
int s_max = 3, ss_max = 4, eu_max = 8;
int s, ss;
u32 fuse2, s_enable, ss_disable, eu_disable;
u8 eu_mask = 0xff;
info = (struct intel_device_info *)&dev_priv->info;
fuse2 = I915_READ(GEN8_FUSE2);
s_enable = (fuse2 & GEN8_F2_S_ENA_MASK) >>
GEN8_F2_S_ENA_SHIFT;
ss_disable = (fuse2 & GEN9_F2_SS_DIS_MASK) >>
GEN9_F2_SS_DIS_SHIFT;
info->slice_total = hweight32(s_enable);
/*
* The subslice disable field is global, i.e. it applies
* to each of the enabled slices.
*/
info->subslice_per_slice = ss_max - hweight32(ss_disable);
info->subslice_total = info->slice_total *
info->subslice_per_slice;
/*
* Iterate through enabled slices and subslices to
* count the total enabled EU.
*/
for (s = 0; s < s_max; s++) {
if (!(s_enable & (0x1 << s)))
/* skip disabled slice */
continue;
eu_disable = I915_READ(GEN9_EU_DISABLE(s));
for (ss = 0; ss < ss_max; ss++) {
int eu_per_ss;
if (ss_disable & (0x1 << ss))
/* skip disabled subslice */
continue;
eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) &
eu_mask);
/*
* Record which subslice(s) has(have) 7 EUs. we
* can tune the hash used to spread work among
* subslices if they are unbalanced.
*/
if (eu_per_ss == 7)
info->subslice_7eu[s] |= 1 << ss;
info->eu_total += eu_per_ss;
}
}
/*
* SKL is expected to always have a uniform distribution
* of EU across subslices with the exception that any one
* EU in any one subslice may be fused off for die
* recovery. BXT is expected to be perfectly uniform in EU
* distribution.
*/
info->eu_per_subslice = info->subslice_total ?
DIV_ROUND_UP(info->eu_total,
info->subslice_total) : 0;
/*
* SKL supports slice power gating on devices with more than
* one slice, and supports EU power gating on devices with
* more than one EU pair per subslice. BXT supports subslice
* power gating on devices with more than one subslice, and
* supports EU power gating on devices with more than one EU
* pair per subslice.
*/
info->has_slice_pg = ((IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) &&
(info->slice_total > 1));
info->has_subslice_pg = (IS_BROXTON(dev) && (info->subslice_total > 1));
info->has_eu_pg = (info->eu_per_subslice > 2);
}
static void broadwell_sseu_info_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_device_info *info;
const int s_max = 3, ss_max = 3, eu_max = 8;
int s, ss;
u32 fuse2, eu_disable[s_max], s_enable, ss_disable;
fuse2 = I915_READ(GEN8_FUSE2);
s_enable = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
ss_disable = (fuse2 & GEN8_F2_SS_DIS_MASK) >> GEN8_F2_SS_DIS_SHIFT;
eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK;
eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) |
((I915_READ(GEN8_EU_DISABLE1) & GEN8_EU_DIS1_S1_MASK) <<
(32 - GEN8_EU_DIS0_S1_SHIFT));
eu_disable[2] = (I915_READ(GEN8_EU_DISABLE1) >> GEN8_EU_DIS1_S2_SHIFT) |
((I915_READ(GEN8_EU_DISABLE2) & GEN8_EU_DIS2_S2_MASK) <<
(32 - GEN8_EU_DIS1_S2_SHIFT));
info = (struct intel_device_info *)&dev_priv->info;
info->slice_total = hweight32(s_enable);
/*
* The subslice disable field is global, i.e. it applies
* to each of the enabled slices.
*/
info->subslice_per_slice = ss_max - hweight32(ss_disable);
info->subslice_total = info->slice_total * info->subslice_per_slice;
/*
* Iterate through enabled slices and subslices to
* count the total enabled EU.
*/
for (s = 0; s < s_max; s++) {
if (!(s_enable & (0x1 << s)))
/* skip disabled slice */
continue;
for (ss = 0; ss < ss_max; ss++) {
u32 n_disabled;
if (ss_disable & (0x1 << ss))
/* skip disabled subslice */
continue;
n_disabled = hweight8(eu_disable[s] >> (ss * eu_max));
/*
* Record which subslices have 7 EUs.
*/
if (eu_max - n_disabled == 7)
info->subslice_7eu[s] |= 1 << ss;
info->eu_total += eu_max - n_disabled;
}
}
/*
* BDW is expected to always have a uniform distribution of EU across
* subslices with the exception that any one EU in any one subslice may
* be fused off for die recovery.
*/
info->eu_per_subslice = info->subslice_total ?
DIV_ROUND_UP(info->eu_total, info->subslice_total) : 0;
/*
* BDW supports slice power gating on devices with more than
* one slice.
*/
info->has_slice_pg = (info->slice_total > 1);
info->has_subslice_pg = 0;
info->has_eu_pg = 0;
}
/*
* Determine various intel_device_info fields at runtime.
*
* Use it when either:
* - it's judged too laborious to fill n static structures with the limit
* when a simple if statement does the job,
* - run-time checks (eg read fuse/strap registers) are needed.
*
* This function needs to be called:
* - after the MMIO has been setup as we are reading registers,
* - after the PCH has been detected,
* - before the first usage of the fields it can tweak.
*/
static void intel_device_info_runtime_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_device_info *info;
enum pipe pipe;
info = (struct intel_device_info *)&dev_priv->info;
/*
* Skylake and Broxton currently don't expose the topmost plane as its
* use is exclusive with the legacy cursor and we only want to expose
* one of those, not both. Until we can safely expose the topmost plane
* as a DRM_PLANE_TYPE_CURSOR with all the features exposed/supported,
* we don't expose the topmost plane at all to prevent ABI breakage
* down the line.
*/
if (IS_BROXTON(dev)) {
info->num_sprites[PIPE_A] = 2;
info->num_sprites[PIPE_B] = 2;
info->num_sprites[PIPE_C] = 1;
} else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev))
for_each_pipe(dev_priv, pipe)
info->num_sprites[pipe] = 2;
else
for_each_pipe(dev_priv, pipe)
info->num_sprites[pipe] = 1;
if (i915.disable_display) {
DRM_INFO("Display disabled (module parameter)\n");
info->num_pipes = 0;
} else if (info->num_pipes > 0 &&
(INTEL_INFO(dev)->gen == 7 || INTEL_INFO(dev)->gen == 8) &&
HAS_PCH_SPLIT(dev)) {
u32 fuse_strap = I915_READ(FUSE_STRAP);
u32 sfuse_strap = I915_READ(SFUSE_STRAP);
/*
* SFUSE_STRAP is supposed to have a bit signalling the display
* is fused off. Unfortunately it seems that, at least in
* certain cases, fused off display means that PCH display
* reads don't land anywhere. In that case, we read 0s.
*
* On CPT/PPT, we can detect this case as SFUSE_STRAP_FUSE_LOCK
* should be set when taking over after the firmware.
*/
if (fuse_strap & ILK_INTERNAL_DISPLAY_DISABLE ||
sfuse_strap & SFUSE_STRAP_DISPLAY_DISABLED ||
(dev_priv->pch_type == PCH_CPT &&
!(sfuse_strap & SFUSE_STRAP_FUSE_LOCK))) {
DRM_INFO("Display fused off, disabling\n");
info->num_pipes = 0;
} else if (fuse_strap & IVB_PIPE_C_DISABLE) {
DRM_INFO("PipeC fused off\n");
info->num_pipes -= 1;
}
} else if (info->num_pipes > 0 && INTEL_INFO(dev)->gen == 9) {
u32 dfsm = I915_READ(SKL_DFSM);
u8 disabled_mask = 0;
bool invalid;
int num_bits;
if (dfsm & SKL_DFSM_PIPE_A_DISABLE)
disabled_mask |= BIT(PIPE_A);
if (dfsm & SKL_DFSM_PIPE_B_DISABLE)
disabled_mask |= BIT(PIPE_B);
if (dfsm & SKL_DFSM_PIPE_C_DISABLE)
disabled_mask |= BIT(PIPE_C);
num_bits = hweight8(disabled_mask);
switch (disabled_mask) {
case BIT(PIPE_A):
case BIT(PIPE_B):
case BIT(PIPE_A) | BIT(PIPE_B):
case BIT(PIPE_A) | BIT(PIPE_C):
invalid = true;
break;
default:
invalid = false;
}
if (num_bits > info->num_pipes || invalid)
DRM_ERROR("invalid pipe fuse configuration: 0x%x\n",
disabled_mask);
else
info->num_pipes -= num_bits;
}
/* Initialize slice/subslice/EU info */
if (IS_CHERRYVIEW(dev))
cherryview_sseu_info_init(dev);
else if (IS_BROADWELL(dev))
broadwell_sseu_info_init(dev);
else if (INTEL_INFO(dev)->gen >= 9)
gen9_sseu_info_init(dev);
/* Snooping is broken on BXT A stepping. */
info->has_snoop = !info->has_llc;
info->has_snoop &= !IS_BXT_REVID(dev, 0, BXT_REVID_A1);
DRM_DEBUG_DRIVER("slice total: %u\n", info->slice_total);
DRM_DEBUG_DRIVER("subslice total: %u\n", info->subslice_total);
DRM_DEBUG_DRIVER("subslice per slice: %u\n", info->subslice_per_slice);
DRM_DEBUG_DRIVER("EU total: %u\n", info->eu_total);
DRM_DEBUG_DRIVER("EU per subslice: %u\n", info->eu_per_subslice);
DRM_DEBUG_DRIVER("has slice power gating: %s\n",
info->has_slice_pg ? "y" : "n");
DRM_DEBUG_DRIVER("has subslice power gating: %s\n",
info->has_subslice_pg ? "y" : "n");
DRM_DEBUG_DRIVER("has EU power gating: %s\n",
info->has_eu_pg ? "y" : "n");
}
static void intel_init_dpio(struct drm_i915_private *dev_priv)
{
/*
* IOSF_PORT_DPIO is used for VLV x2 PHY (DP/HDMI B and C),
* CHV x1 PHY (DP/HDMI D)
* IOSF_PORT_DPIO_2 is used for CHV x2 PHY (DP/HDMI B and C)
*/
if (IS_CHERRYVIEW(dev_priv)) {
DPIO_PHY_IOSF_PORT(DPIO_PHY0) = IOSF_PORT_DPIO_2;
DPIO_PHY_IOSF_PORT(DPIO_PHY1) = IOSF_PORT_DPIO;
} else if (IS_VALLEYVIEW(dev_priv)) {
DPIO_PHY_IOSF_PORT(DPIO_PHY0) = IOSF_PORT_DPIO;
}
}
static int i915_workqueues_init(struct drm_i915_private *dev_priv)
{
/*
* The i915 workqueue is primarily used for batched retirement of
* requests (and thus managing bo) once the task has been completed
* by the GPU. i915_gem_retire_requests() is called directly when we
* need high-priority retirement, such as waiting for an explicit
* bo.
*
* It is also used for periodic low-priority events, such as
* idle-timers and recording error state.
*
* All tasks on the workqueue are expected to acquire the dev mutex
* so there is no point in running more than one instance of the
* workqueue at any time. Use an ordered one.
*/
dev_priv->wq = alloc_ordered_workqueue("i915", 0);
if (dev_priv->wq == NULL)
goto out_err;
dev_priv->hotplug.dp_wq = alloc_ordered_workqueue("i915-dp", 0);
if (dev_priv->hotplug.dp_wq == NULL)
goto out_free_wq;
dev_priv->gpu_error.hangcheck_wq =
alloc_ordered_workqueue("i915-hangcheck", 0);
if (dev_priv->gpu_error.hangcheck_wq == NULL)
goto out_free_dp_wq;
return 0;
out_free_dp_wq:
destroy_workqueue(dev_priv->hotplug.dp_wq);
out_free_wq:
destroy_workqueue(dev_priv->wq);
out_err:
DRM_ERROR("Failed to allocate workqueues.\n");
return -ENOMEM;
}
static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv)
{
destroy_workqueue(dev_priv->gpu_error.hangcheck_wq);
destroy_workqueue(dev_priv->hotplug.dp_wq);
destroy_workqueue(dev_priv->wq);
}
static int i915_mmio_setup(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
int mmio_bar;
int mmio_size;
mmio_bar = IS_GEN2(dev) ? 1 : 0;
/*
* Before gen4, the registers and the GTT are behind different BARs.
* However, from gen4 onwards, the registers and the GTT are shared
* in the same BAR, so we want to restrict this ioremap from
* clobbering the GTT which we want ioremap_wc instead. Fortunately,
* the register BAR remains the same size for all the earlier
* generations up to Ironlake.
*/
if (INTEL_INFO(dev)->gen < 5)
mmio_size = 512 * 1024;
else
mmio_size = 2 * 1024 * 1024;
dev_priv->regs = pci_iomap(dev->pdev, mmio_bar, mmio_size);
if (dev_priv->regs == NULL) {
DRM_ERROR("failed to map registers\n");
return -EIO;
}
/* Try to make sure MCHBAR is enabled before poking at it */
intel_setup_mchbar(dev);
return 0;
}
static void i915_mmio_cleanup(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
intel_teardown_mchbar(dev);
pci_iounmap(dev->pdev, dev_priv->regs);
}
/**
* i915_driver_load - setup chip and create an initial config
* @dev: DRM device
* @flags: startup flags
*
* The driver load routine has to do several things:
* - drive output discovery via intel_modeset_init()
* - initialize the memory manager
* - allocate initial config memory
* - setup the DRM framebuffer with the allocated memory
*/
int i915_driver_load(struct drm_device *dev, unsigned long flags)
{
struct drm_i915_private *dev_priv;
struct intel_device_info *info, *device_info;
int ret = 0;
uint32_t aperture_size;
info = (struct intel_device_info *) flags;
dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL);
if (dev_priv == NULL)
return -ENOMEM;
dev->dev_private = dev_priv;
dev_priv->dev = dev;
/* Setup the write-once "constant" device info */
device_info = (struct intel_device_info *)&dev_priv->info;
memcpy(device_info, info, sizeof(dev_priv->info));
device_info->device_id = dev->pdev->device;
drm/i915: fix long-standing SNB regression in power consumption after resume v2 This patch fixes regression in power consumtion of sandy bridge gpu, which exists since v3.6 Sometimes after resuming from s2ram gpu starts thinking that it's extremely busy. After that it never reaches rc6 state. Bug exists since kernel v3.6: commit b4ae3f22d238617ca11610b29fde16cf8c0bc6e0 Author: Jesse Barnes <jbarnes@virtuousgeek.org> Date: Thu Jun 14 11:04:48 2012 -0700 drm/i915: load boot context at driver init time For some reason RC6 is already enabled at the beginning of resuming process. Following initliaztion breaks some internal state and confuses RPS engine. This patch disables RC6 at the beginnig of resume and initialization. I've rearranged initialization sequence, because intel_disable_gt_powersave() needs initialized force_wake_get/put and some locks from the dev_priv. Note: The culprit in the initialization sequence seems to be the write to MBCTL added in the above mentioned commit. The first version of this patch just held a forcewake reference across the clock gating init functions, which seems to have been enought to gather quite a few positive test reports. But since that smelled a bit like ad-hoc duct-tape v2 now just disables rps/rc6 across the entire hw setup. References: https://bugs.freedesktop.org/show_bug.cgi?id=54089 References: https://bugzilla.kernel.org/show_bug.cgi?id=58971 References: https://patchwork.kernel.org/patch/2827634/ (patch v1) Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Jesse Barnes <jbarnes@virtuousgeek.org> [danvet: Add note about v1 vs. v2 of this patch and use standard layout for the commit citation. Also add the tested-bys from v1 and a cc: stable.] Cc: stable@vger.kernel.org (Note: tiny conflict due to the addition of the backlight lock in 3.11) Tested-by: Alexander Kaltsas <alexkaltsas@gmail.com> (v1) Tested-by: rocko <rockorequin@hotmail.com> (v1) Tested-by: JohnMB <johnmbryant@sky.com> (v1) Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-07-17 13:22:58 +07:00
spin_lock_init(&dev_priv->irq_lock);
spin_lock_init(&dev_priv->gpu_error.lock);
mutex_init(&dev_priv->backlight_lock);
spin_lock_init(&dev_priv->uncore.lock);
spin_lock_init(&dev_priv->mm.object_stat_lock);
drm/i915: Replaced Blitter ring based flips with MMIO flips This patch enables the framework for using MMIO based flip calls, in contrast with the CS based flip calls which are being used currently. MMIO based flip calls can be enabled on architectures where Render and Blitter engines reside in different power wells. The decision to use MMIO flips can be made based on workloads to give 100% residency for Media power well. v2: The MMIO flips now use the interrupt driven mechanism for issuing the flips when target seqno is reached. (Incorporating Ville's idea) v3: Rebasing on latest code. Code restructuring after incorporating Damien's comments v4: Addressing Ville's review comments -general cleanup -updating only base addr instead of calling update_primary_plane -extending patch for gen5+ platforms v5: Addressed Ville's review comments -Making mmio flip vs cs flip selection based on module parameter -Adding check for DRIVER_MODESET feature in notify_ring before calling notify mmio flip. -Other changes mostly in function arguments v6: -Having a seperate function to check condition for using mmio flips (Ville) -propogating error code from i915_gem_check_olr (Ville) v7: -Adding __must_check with i915_gem_check_olr (Chris) -Renaming mmio_flip_data to mmio_flip (Chris) -Rebasing on latest nightly v8: -Rebasing on latest code -squash 3rd patch in series(mmio setbase vs page flip race) with this patch -Added new tiling mode update in intel_do_mmio_flip (Chris) v9: -check for obj->last_write_seqno being 0 instead of obj->ring being NULL in intel_postpone_flip, as this is a more restrictive condition (Chris) v10: -Applied Chris's suggestions for squashing patches 2,3 into this patch. These patches make the selection of CS vs MMIO flip at the page flip time, and make the module parameter for using mmio flips as tristate, the states being 'force CS flips', 'force mmio flips', 'driver discretion'. Changed the logic for driver discretion (Chris) v11: Minor code cleanup(better readability, fixing whitespace errors, using lockdep to check mutex locked status in postpone_flip, removal of __must_check in function definition) (Chris) Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Sourab Gupta <sourab.gupta@intel.com> Signed-off-by: Akash Goel <akash.goel@intel.com> Tested-by: Chris Wilson <chris@chris-wilson.co.uk> # snb, ivb [danvet: Fix up parameter alignement checkpatch spotted.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-06-02 18:17:17 +07:00
spin_lock_init(&dev_priv->mmio_flip_lock);
mutex_init(&dev_priv->sb_lock);
drm/i915: fix long-standing SNB regression in power consumption after resume v2 This patch fixes regression in power consumtion of sandy bridge gpu, which exists since v3.6 Sometimes after resuming from s2ram gpu starts thinking that it's extremely busy. After that it never reaches rc6 state. Bug exists since kernel v3.6: commit b4ae3f22d238617ca11610b29fde16cf8c0bc6e0 Author: Jesse Barnes <jbarnes@virtuousgeek.org> Date: Thu Jun 14 11:04:48 2012 -0700 drm/i915: load boot context at driver init time For some reason RC6 is already enabled at the beginning of resuming process. Following initliaztion breaks some internal state and confuses RPS engine. This patch disables RC6 at the beginnig of resume and initialization. I've rearranged initialization sequence, because intel_disable_gt_powersave() needs initialized force_wake_get/put and some locks from the dev_priv. Note: The culprit in the initialization sequence seems to be the write to MBCTL added in the above mentioned commit. The first version of this patch just held a forcewake reference across the clock gating init functions, which seems to have been enought to gather quite a few positive test reports. But since that smelled a bit like ad-hoc duct-tape v2 now just disables rps/rc6 across the entire hw setup. References: https://bugs.freedesktop.org/show_bug.cgi?id=54089 References: https://bugzilla.kernel.org/show_bug.cgi?id=58971 References: https://patchwork.kernel.org/patch/2827634/ (patch v1) Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Jesse Barnes <jbarnes@virtuousgeek.org> [danvet: Add note about v1 vs. v2 of this patch and use standard layout for the commit citation. Also add the tested-bys from v1 and a cc: stable.] Cc: stable@vger.kernel.org (Note: tiny conflict due to the addition of the backlight lock in 3.11) Tested-by: Alexander Kaltsas <alexkaltsas@gmail.com> (v1) Tested-by: rocko <rockorequin@hotmail.com> (v1) Tested-by: JohnMB <johnmbryant@sky.com> (v1) Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-07-17 13:22:58 +07:00
mutex_init(&dev_priv->modeset_restore_lock);
mutex_init(&dev_priv->av_mutex);
drm/i915: Add two-stage ILK-style watermark programming (v11) In addition to calculating final watermarks, let's also pre-calculate a set of intermediate watermark values at atomic check time. These intermediate watermarks are a combination of the watermarks for the old state and the new state; they should satisfy the requirements of both states which means they can be programmed immediately when we commit the atomic state (without waiting for a vblank). Once the vblank does happen, we can then re-program watermarks to the more optimal final value. v2: Significant rebasing/rewriting. v3: - Move 'need_postvbl_update' flag to CRTC state (Daniel) - Don't forget to check intermediate watermark values for validity (Maarten) - Don't due async watermark optimization; just do it at the end of the atomic transaction, after waiting for vblanks. We do want it to be async eventually, but adding that now will cause more trouble for Maarten's in-progress work. (Maarten) - Don't allocate space in crtc_state for intermediate watermarks on platforms that don't need it (gen9+). - Move WaCxSRDisabledForSpriteScaling:ivb into intel_begin_crtc_commit now that ilk_update_wm is gone. v4: - Add a wm_mutex to cover updates to intel_crtc->active and the need_postvbl_update flag. Since we don't have async yet it isn't terribly important yet, but might as well add it now. - Change interface to program watermarks. Platforms will now expose .initial_watermarks() and .optimize_watermarks() functions to do watermark programming. These should lock wm_mutex, copy the appropriate state values into intel_crtc->active, and then call the internal program watermarks function. v5: - Skip intermediate watermark calculation/check during initial hardware readout since we don't trust the existing HW values (and don't have valid values of our own yet). - Don't try to call .optimize_watermarks() on platforms that don't have atomic watermarks yet. (Maarten) v6: - Rebase v7: - Further rebase v8: - A few minor indentation and line length fixes v9: - Yet another rebase since Maarten's patches reworked a bunch of the code (wm_pre, wm_post, etc.) that this was previously based on. v10: - Move wm_mutex to dev_priv to protect against racing commits against disjoint CRTC sets. (Maarten) - Drop unnecessary clearing of cstate->wm.need_postvbl_update (Maarten) v11: - Now that we've moved to atomic watermark updates, make sure we call the proper function to program watermarks in {ironlake,haswell}_crtc_enable(); the failure to do so on the previous patch iteration led to us not actually programming the watermarks before turning on the CRTC, which was the cause of the underruns that the CI system was seeing. - Fix inverted logic for determining when to optimize watermarks. We were needlessly optimizing when the intermediate/optimal values were the same (harmless), but not actually optimizing when they differed (also harmless, but wasteful from a power/bandwidth perspective). Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1456276813-5689-1-git-send-email-matthew.d.roper@intel.com
2016-02-24 08:20:13 +07:00
mutex_init(&dev_priv->wm.wm_mutex);
mutex_init(&dev_priv->pps_mutex);
drm/i915: fix long-standing SNB regression in power consumption after resume v2 This patch fixes regression in power consumtion of sandy bridge gpu, which exists since v3.6 Sometimes after resuming from s2ram gpu starts thinking that it's extremely busy. After that it never reaches rc6 state. Bug exists since kernel v3.6: commit b4ae3f22d238617ca11610b29fde16cf8c0bc6e0 Author: Jesse Barnes <jbarnes@virtuousgeek.org> Date: Thu Jun 14 11:04:48 2012 -0700 drm/i915: load boot context at driver init time For some reason RC6 is already enabled at the beginning of resuming process. Following initliaztion breaks some internal state and confuses RPS engine. This patch disables RC6 at the beginnig of resume and initialization. I've rearranged initialization sequence, because intel_disable_gt_powersave() needs initialized force_wake_get/put and some locks from the dev_priv. Note: The culprit in the initialization sequence seems to be the write to MBCTL added in the above mentioned commit. The first version of this patch just held a forcewake reference across the clock gating init functions, which seems to have been enought to gather quite a few positive test reports. But since that smelled a bit like ad-hoc duct-tape v2 now just disables rps/rc6 across the entire hw setup. References: https://bugs.freedesktop.org/show_bug.cgi?id=54089 References: https://bugzilla.kernel.org/show_bug.cgi?id=58971 References: https://patchwork.kernel.org/patch/2827634/ (patch v1) Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Jesse Barnes <jbarnes@virtuousgeek.org> [danvet: Add note about v1 vs. v2 of this patch and use standard layout for the commit citation. Also add the tested-bys from v1 and a cc: stable.] Cc: stable@vger.kernel.org (Note: tiny conflict due to the addition of the backlight lock in 3.11) Tested-by: Alexander Kaltsas <alexkaltsas@gmail.com> (v1) Tested-by: rocko <rockorequin@hotmail.com> (v1) Tested-by: JohnMB <johnmbryant@sky.com> (v1) Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-07-17 13:22:58 +07:00
ret = i915_workqueues_init(dev_priv);
if (ret < 0)
goto out_free_priv;
/* This must be called before any calls to HAS_PCH_* */
intel_detect_pch(dev);
intel_pm_setup(dev);
intel_init_dpio(dev_priv);
intel_power_domains_init(dev_priv);
intel_irq_init(dev_priv);
intel_init_display_hooks(dev_priv);
intel_init_clock_gating_hooks(dev_priv);
intel_init_audio_hooks(dev_priv);
drm/i915: allow package C8+ states on Haswell (disabled) This patch allows PC8+ states on Haswell. These states can only be reached when all the display outputs are disabled, and they allow some more power savings. The fact that the graphics device is allowing PC8+ doesn't mean that the machine will actually enter PC8+: all the other devices also need to allow PC8+. For now this option is disabled by default. You need i915.allow_pc8=1 if you want it. This patch adds a big comment inside i915_drv.h explaining how it works and how it tracks things. Read it. v2: (this is not really v2, many previous versions were already sent, but they had different names) - Use the new functions to enable/disable GTIMR and GEN6_PMIMR - Rename almost all variables and functions to names suggested by Chris - More WARNs on the IRQ handling code - Also disable PC8 when there's GPU work to do (thanks to Ben for the help on this), so apps can run caster - Enable PC8 on a delayed work function that is delayed for 5 seconds. This makes sure we only enable PC8+ if we're really idle - Make sure we're not in PC8+ when suspending v3: - WARN if IRQs are disabled on __wait_seqno - Replace some DRM_ERRORs with WARNs - Fix calls to restore GT and PM interrupts - Use intel_mark_busy instead of intel_ring_advance to disable PC8 v4: - Use the force_wake, Luke! v5: - Remove the "IIR is not zero" WARNs - Move the force_wake chunk to its own patch - Only restore what's missing from RC6, not everything Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-08-19 23:18:09 +07:00
drm/i915: add support for checking if we hold an RPM reference Atm, we assert that the device is not suspended until the point when the device is truly put to a suspended state. This is fine, but we can catch more problems if we check that RPM refcount is non-zero. After that one drops to zero we shouldn't access the device any more, even if the actual device suspend may be delayed. Change assert_rpm_wakelock_held() accordingly to check for a non-zero RPM refcount in addition to the current device-not-suspended check. For the new asserts to work we need to annotate every place explicitly in the code where we expect that the device is powered. The places where we only assume this, but may not hold an RPM reference: - driver load We assume the device to be powered until we enable RPM. Make this explicit by taking an RPM reference around the load function. - system and runtime sudpend/resume handlers These handlers are called when the RPM reference becomes 0 and know the exact point after which the device can get powered off. Disable the RPM-reference-held check for their duration. - the IRQ, hangcheck and RPS work handlers These handlers are flushed in the system/runtime suspend handler before the device is powered off, so it's guaranteed that they won't run while the device is powered off even though they don't hold any RPM reference. Disable the RPM-reference-held check for their duration. In all these cases we still check that the device is not suspended. These explicit annotations also have the positive side effect of documenting our assumptions better. This caught additional WARNs from the atomic modeset path, those should be fixed separately. v2: - remove the redundant HAS_RUNTIME_PM check (moved to patch 1) (Ville) v3: - use a new dedicated RPM wakelock refcount to also catch cases where our own RPM get/put functions were not called (Chris) - assert also that the new RPM wakelock refcount is 0 in the RPM suspend handler (Chris) - change the assert error message to be more meaningful (Chris) - prevent false assert errors and check that the RPM wakelock is 0 in the RPM resume handler too - prevent false assert errors in the hangcheck work too - add a device not suspended assert check to the hangcheck work v4: - rename disable/enable_rpm_asserts to disable/enable_rpm_wakeref_asserts and wakelock_count to wakeref_count - disable the wakeref asserts in the IRQ handlers and RPS work too - update/clarify commit message v5: - mark places we plan to change to use proper RPM refcounting with separate DISABLE/ENABLE_RPM_WAKEREF_ASSERTS aliases (Chris) Signed-off-by: Imre Deak <imre.deak@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/1450227139-13471-1-git-send-email-imre.deak@intel.com
2015-12-16 07:52:19 +07:00
intel_runtime_pm_get(dev_priv);
intel_display_crc_init(dev);
i915_dump_device_info(dev_priv);
/* Not all pre-production machines fall into this category, only the
* very first ones. Almost everything should work, except for maybe
* suspend/resume. And we don't implement workarounds that affect only
* pre-production machines. */
if (IS_HSW_EARLY_SDV(dev))
DRM_INFO("This is an early pre-production Haswell machine. "
"It may not be fully functional.\n");
if (i915_get_bridge_dev(dev)) {
ret = -EIO;
goto out_runtime_pm_put;
}
ret = i915_mmio_setup(dev);
if (ret < 0)
goto put_bridge;
intel_uncore_init(dev);
drm/i915: Stop using AGP layer for GEN6+ As a quick hack we make the old intel_gtt structure mutable so we can fool a bunch of the existing code which depends on elements in that data structure. We can/should try to remove this in a subsequent patch. This should preserve the old gtt init behavior which upon writing these patches seems incorrect. The next patch will fix these things. The one exception is VLV which doesn't have the preserved flush control write behavior. Since we want to do that for all GEN6+ stuff, we'll handle that in a later patch. Mainstream VLV support doesn't actually exist yet anyway. v2: Update the comment to remove the "voodoo" Check that the last pte written matches what we readback v3: actually kill cache_level_to_agp_type since most of the flags will disappear in an upcoming patch v4: v3 was actually not what we wanted (Daniel) Make the ggtt bind assertions better and stricter (Chris) Fix some uncaught errors at gtt init (Chris) Some other random stuff that Chris wanted v5: check for i==0 in gen6_ggtt_bind_object to shut up gcc (Ben) Signed-off-by: Ben Widawsky <ben@bwidawsk.net> Reviewed-by [v4]: Chris Wilson <chris@chris-wilson.co.uk> [danvet: Make the cache_level -> agp_flags conversion for pre-gen6 a tad more robust by mapping everything != CACHE_NONE to the cached agp flag - we have a 1:1 uncached mapping, but different modes of cacheable (at least on later generations). Suggested by Chris Wilson.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-11-05 00:21:27 +07:00
ret = i915_gem_gtt_init(dev);
if (ret)
goto out_uncore_fini;
/* WARNING: Apparently we must kick fbdev drivers before vgacon,
* otherwise the vga fbdev driver falls over. */
ret = i915_kick_out_firmware_fb(dev_priv);
if (ret) {
DRM_ERROR("failed to remove conflicting framebuffer drivers\n");
goto out_gtt;
}
ret = i915_kick_out_vgacon(dev_priv);
if (ret) {
DRM_ERROR("failed to remove conflicting VGA console\n");
goto out_gtt;
}
pci_set_master(dev->pdev);
/* overlay on gen2 is broken and can't address above 1G */
if (IS_GEN2(dev))
dma_set_coherent_mask(&dev->pdev->dev, DMA_BIT_MASK(30));
/* 965GM sometimes incorrectly writes to hardware status page (HWS)
* using 32bit addressing, overwriting memory if HWS is located
* above 4GB.
*
* The documentation also mentions an issue with undefined
* behaviour if any general state is accessed within a page above 4GB,
* which also needs to be handled carefully.
*/
if (IS_BROADWATER(dev) || IS_CRESTLINE(dev))
dma_set_coherent_mask(&dev->pdev->dev, DMA_BIT_MASK(32));
aperture_size = dev_priv->gtt.mappable_end;
dev_priv->gtt.mappable =
io_mapping_create_wc(dev_priv->gtt.mappable_base,
aperture_size);
if (dev_priv->gtt.mappable == NULL) {
ret = -EIO;
goto out_gtt;
}
dev_priv->gtt.mtrr = arch_phys_wc_add(dev_priv->gtt.mappable_base,
aperture_size);
pm_qos_add_request(&dev_priv->pm_qos, PM_QOS_CPU_DMA_LATENCY,
PM_QOS_DEFAULT_VALUE);
intel_uncore_sanitize(dev);
intel_opregion_setup(dev);
i915_gem_load_init(dev);
i915_gem_shrinker_init(dev_priv);
/* On the 945G/GM, the chipset reports the MSI capability on the
* integrated graphics even though the support isn't actually there
* according to the published specs. It doesn't appear to function
* correctly in testing on 945G.
* This may be a side effect of MSI having been made available for PEG
* and the registers being closely associated.
*
* According to chipset errata, on the 965GM, MSI interrupts may
* be lost or delayed, but we use them anyways to avoid
* stuck interrupts on some machines.
*/
if (!IS_I945G(dev) && !IS_I945GM(dev)) {
if (pci_enable_msi(dev->pdev) < 0)
DRM_DEBUG_DRIVER("can't enable MSI");
}
intel_device_info_runtime_init(dev);
if (INTEL_INFO(dev)->num_pipes) {
ret = drm_vblank_init(dev, INTEL_INFO(dev)->num_pipes);
if (ret)
goto out_gem_unload;
}
ret = i915_load_modeset_init(dev);
if (ret < 0) {
DRM_ERROR("failed to init modeset\n");
goto out_power_well;
}
/*
* Notify a valid surface after modesetting,
* when running inside a VM.
*/
if (intel_vgpu_active(dev))
I915_WRITE(vgtif_reg(display_ready), VGT_DRV_DISPLAY_READY);
drm/i915: rc6 in sysfs Merge rc6 information into the power group for our device. Until now the i915 driver has not had any sysfs entries (aside from the connector stuff enabled by drm core). Since it seems like we're likely to have more in the future I created a new file for sysfs stubs, as well as the rc6 sysfs functions which don't really belong elsewhere (perhaps i915_suspend, but most of the stuff is in intel_display,c). displays rc6 modes enabled (as a hex mask): cat /sys/class/drm/card0/power/rc6_enable displays #ms GPU has been in rc6 since boot: cat /sys/class/drm/card0/power/rc6_residency_ms displays #ms GPU has been in deep rc6 since boot: cat /sys/class/drm/card0/power/rc6p_residency_ms displays #ms GPU has been in deepest rc6 since boot: cat /sys/class/drm/card0/power/rc6pp_residency_ms Important note: I've seen on SNB that even when RC6 is *not* enabled the rc6 register seems to have a random value in it. I can only guess at the reason reason for this. Those writing tools that utilize this value need to be careful and probably want to scrutinize the value very carefully. v2: use common rc6 residency units to milliseconds for the other RC6 types v3: don't create sysfs files for GEN <= 5 add a rc6_enable to show a mask of enabled rc6 types use unmerge instead of remove for sysfs group squash intel_enable_rc6() extraction into this patch v4: rename sysfs files (Chris) CC: Chris Wilson <chris@chris-wilson.co.uk> CC: Daniel Vetter <daniel.vetter@ffwll.ch>f CC: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Ben Widawsky <benjamin.widawsky@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: squash in the 64bit division fix by Chris Wilson.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-04-11 11:17:01 +07:00
i915_setup_sysfs(dev);
if (INTEL_INFO(dev)->num_pipes) {
/* Must be done after probing outputs */
intel_opregion_init(dev);
Revert "ACPI / video / i915: No ACPI backlight if firmware expects Windows 8" We attempted to address a regression introduced by commit a57f7f9 (ACPICA: Add Windows8/Server2012 string for _OSI method.) after which ACPI video backlight support doesn't work on a number of systems, because the relevant AML methods in the ACPI tables in their BIOSes become useless after the BIOS has been told that the OS is compatible with Windows 8. That problem is tracked by the bug entry at: https://bugzilla.kernel.org/show_bug.cgi?id=51231 Commit 8c5bd7a (ACPI / video / i915: No ACPI backlight if firmware expects Windows 8) introduced for this purpose essentially prevented the ACPI backlight support from being used if the BIOS had been told that the OS was compatible with Windows 8 and the i915 driver was loaded, in which case the backlight would always be handled by i915. Unfortunately, however, that turned out to cause problems with backlight to appear on multiple systems with symptoms indicating that i915 was unable to control the backlight on those systems as expected. For this reason, revert commit 8c5bd7a, but leave the function acpi_video_backlight_quirks() introduced by it, because another commit on top of it uses that function. References: https://lkml.org/lkml/2013/7/21/119 References: https://lkml.org/lkml/2013/7/22/261 References: https://lkml.org/lkml/2013/7/23/429 References: https://lkml.org/lkml/2013/7/23/459 References: https://lkml.org/lkml/2013/7/23/81 References: https://lkml.org/lkml/2013/7/24/27 Reported-and-tested-by: James Hogan <james@albanarts.com> Reported-and-tested-by: Kamal Mostafa <kamal@canonical.com> Reported-and-tested-by: Jörg Otte <jrg.otte@gmail.com> Reported-and-tested-by: Steven Newbury <steve@snewbury.org.uk> Reported-by: Martin Steigerwald <Martin@lichtvoll.de> Reported-by: Kalle Valo <kvalo@adurom.com> Tested-by: Joerg Platte <jplatte@naasa.net> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2013-07-26 02:43:39 +07:00
acpi_video_register();
}
if (IS_GEN5(dev))
intel_gpu_ips_init(dev_priv);
intel_runtime_pm_enable(dev_priv);
i915_audio_component_init(dev_priv);
drm/i915: add support for checking if we hold an RPM reference Atm, we assert that the device is not suspended until the point when the device is truly put to a suspended state. This is fine, but we can catch more problems if we check that RPM refcount is non-zero. After that one drops to zero we shouldn't access the device any more, even if the actual device suspend may be delayed. Change assert_rpm_wakelock_held() accordingly to check for a non-zero RPM refcount in addition to the current device-not-suspended check. For the new asserts to work we need to annotate every place explicitly in the code where we expect that the device is powered. The places where we only assume this, but may not hold an RPM reference: - driver load We assume the device to be powered until we enable RPM. Make this explicit by taking an RPM reference around the load function. - system and runtime sudpend/resume handlers These handlers are called when the RPM reference becomes 0 and know the exact point after which the device can get powered off. Disable the RPM-reference-held check for their duration. - the IRQ, hangcheck and RPS work handlers These handlers are flushed in the system/runtime suspend handler before the device is powered off, so it's guaranteed that they won't run while the device is powered off even though they don't hold any RPM reference. Disable the RPM-reference-held check for their duration. In all these cases we still check that the device is not suspended. These explicit annotations also have the positive side effect of documenting our assumptions better. This caught additional WARNs from the atomic modeset path, those should be fixed separately. v2: - remove the redundant HAS_RUNTIME_PM check (moved to patch 1) (Ville) v3: - use a new dedicated RPM wakelock refcount to also catch cases where our own RPM get/put functions were not called (Chris) - assert also that the new RPM wakelock refcount is 0 in the RPM suspend handler (Chris) - change the assert error message to be more meaningful (Chris) - prevent false assert errors and check that the RPM wakelock is 0 in the RPM resume handler too - prevent false assert errors in the hangcheck work too - add a device not suspended assert check to the hangcheck work v4: - rename disable/enable_rpm_asserts to disable/enable_rpm_wakeref_asserts and wakelock_count to wakeref_count - disable the wakeref asserts in the IRQ handlers and RPS work too - update/clarify commit message v5: - mark places we plan to change to use proper RPM refcounting with separate DISABLE/ENABLE_RPM_WAKEREF_ASSERTS aliases (Chris) Signed-off-by: Imre Deak <imre.deak@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/1450227139-13471-1-git-send-email-imre.deak@intel.com
2015-12-16 07:52:19 +07:00
intel_runtime_pm_put(dev_priv);
return 0;
out_power_well:
intel_power_domains_fini(dev_priv);
drm_vblank_cleanup(dev);
out_gem_unload:
i915_gem_shrinker_cleanup(dev_priv);
if (dev->pdev->msi_enabled)
pci_disable_msi(dev->pdev);
pm_qos_remove_request(&dev_priv->pm_qos);
arch_phys_wc_del(dev_priv->gtt.mtrr);
io_mapping_free(dev_priv->gtt.mappable);
out_gtt:
i915_global_gtt_cleanup(dev);
out_uncore_fini:
intel_uncore_fini(dev);
i915_mmio_cleanup(dev);
put_bridge:
pci_dev_put(dev_priv->bridge_dev);
i915_gem_load_cleanup(dev);
out_runtime_pm_put:
drm/i915: add support for checking if we hold an RPM reference Atm, we assert that the device is not suspended until the point when the device is truly put to a suspended state. This is fine, but we can catch more problems if we check that RPM refcount is non-zero. After that one drops to zero we shouldn't access the device any more, even if the actual device suspend may be delayed. Change assert_rpm_wakelock_held() accordingly to check for a non-zero RPM refcount in addition to the current device-not-suspended check. For the new asserts to work we need to annotate every place explicitly in the code where we expect that the device is powered. The places where we only assume this, but may not hold an RPM reference: - driver load We assume the device to be powered until we enable RPM. Make this explicit by taking an RPM reference around the load function. - system and runtime sudpend/resume handlers These handlers are called when the RPM reference becomes 0 and know the exact point after which the device can get powered off. Disable the RPM-reference-held check for their duration. - the IRQ, hangcheck and RPS work handlers These handlers are flushed in the system/runtime suspend handler before the device is powered off, so it's guaranteed that they won't run while the device is powered off even though they don't hold any RPM reference. Disable the RPM-reference-held check for their duration. In all these cases we still check that the device is not suspended. These explicit annotations also have the positive side effect of documenting our assumptions better. This caught additional WARNs from the atomic modeset path, those should be fixed separately. v2: - remove the redundant HAS_RUNTIME_PM check (moved to patch 1) (Ville) v3: - use a new dedicated RPM wakelock refcount to also catch cases where our own RPM get/put functions were not called (Chris) - assert also that the new RPM wakelock refcount is 0 in the RPM suspend handler (Chris) - change the assert error message to be more meaningful (Chris) - prevent false assert errors and check that the RPM wakelock is 0 in the RPM resume handler too - prevent false assert errors in the hangcheck work too - add a device not suspended assert check to the hangcheck work v4: - rename disable/enable_rpm_asserts to disable/enable_rpm_wakeref_asserts and wakelock_count to wakeref_count - disable the wakeref asserts in the IRQ handlers and RPS work too - update/clarify commit message v5: - mark places we plan to change to use proper RPM refcounting with separate DISABLE/ENABLE_RPM_WAKEREF_ASSERTS aliases (Chris) Signed-off-by: Imre Deak <imre.deak@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/1450227139-13471-1-git-send-email-imre.deak@intel.com
2015-12-16 07:52:19 +07:00
intel_runtime_pm_put(dev_priv);
i915_workqueues_cleanup(dev_priv);
out_free_priv:
kfree(dev_priv);
return ret;
}
int i915_driver_unload(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int ret;
intel_fbdev_fini(dev);
i915_audio_component_cleanup(dev_priv);
ret = i915_gem_suspend(dev);
if (ret) {
DRM_ERROR("failed to idle hardware: %d\n", ret);
return ret;
}
intel_power_domains_fini(dev_priv);
intel_gpu_ips_teardown();
drm/i915: rc6 in sysfs Merge rc6 information into the power group for our device. Until now the i915 driver has not had any sysfs entries (aside from the connector stuff enabled by drm core). Since it seems like we're likely to have more in the future I created a new file for sysfs stubs, as well as the rc6 sysfs functions which don't really belong elsewhere (perhaps i915_suspend, but most of the stuff is in intel_display,c). displays rc6 modes enabled (as a hex mask): cat /sys/class/drm/card0/power/rc6_enable displays #ms GPU has been in rc6 since boot: cat /sys/class/drm/card0/power/rc6_residency_ms displays #ms GPU has been in deep rc6 since boot: cat /sys/class/drm/card0/power/rc6p_residency_ms displays #ms GPU has been in deepest rc6 since boot: cat /sys/class/drm/card0/power/rc6pp_residency_ms Important note: I've seen on SNB that even when RC6 is *not* enabled the rc6 register seems to have a random value in it. I can only guess at the reason reason for this. Those writing tools that utilize this value need to be careful and probably want to scrutinize the value very carefully. v2: use common rc6 residency units to milliseconds for the other RC6 types v3: don't create sysfs files for GEN <= 5 add a rc6_enable to show a mask of enabled rc6 types use unmerge instead of remove for sysfs group squash intel_enable_rc6() extraction into this patch v4: rename sysfs files (Chris) CC: Chris Wilson <chris@chris-wilson.co.uk> CC: Daniel Vetter <daniel.vetter@ffwll.ch>f CC: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Ben Widawsky <benjamin.widawsky@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: squash in the 64bit division fix by Chris Wilson.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-04-11 11:17:01 +07:00
i915_teardown_sysfs(dev);
i915_gem_shrinker_cleanup(dev_priv);
io_mapping_free(dev_priv->gtt.mappable);
arch_phys_wc_del(dev_priv->gtt.mtrr);
acpi_video_unregister();
drm/i915: call drm_vblank_cleanup() earlier at unload In its current place, it just segfaults while trying to access the CRTC structures: [ 9132.421681] Call Trace: [ 9132.421707] [<ffffffffa01130d8>] i915_get_crtc_scanoutpos+0x1e8/0x220 [i915] [ 9132.421727] [<ffffffffa001da34>] drm_calc_vbltimestamp_from_scanoutpos+0x94/0x330 [drm] [ 9132.421744] [<ffffffffa001d240>] ?vblank_disable_and_save+0x40/0x1e0 [drm] [ 9132.421769] [<ffffffffa0114328>] i915_get_vblank_timestamp+0x68/0xb0 [i915] [ 9132.421786] [<ffffffffa001d094>] drm_get_last_vbltimestamp+0x44/0x80 [drm] [ 9132.421801] [<ffffffffa001d3a6>] vblank_disable_and_save+0x1a6/0x1e0 [drm] [ 9132.421817] [<ffffffffa001eac1>] drm_vblank_cleanup+0x61/0xa0 [drm] [ 9132.421849] [<ffffffffa0177a5e>] i915_driver_unload+0xde/0x290 [i915] [ 9132.421867] [<ffffffffa0020264>] drm_dev_unregister+0x24/0xb0 [drm] [ 9132.421884] [<ffffffffa002090e>] drm_put_dev+0x1e/0x70 [drm] [ 9132.421901] [<ffffffffa00e01e0>] i915_pci_remove+0x10/0x20 [i915] [ 9132.421910] [<ffffffff81347556>] pci_device_remove+0x36/0xb0 [ 9132.421920] [<ffffffff8140084a>] __device_release_driver+0x7a/0xf0 [ 9132.421928] [<ffffffff81400fc8>] driver_detach+0xb8/0xc0 [ 9132.421936] [<ffffffff8140054a>] bus_remove_driver+0x4a/0xb0 [ 9132.421944] [<ffffffff81401717>] driver_unregister+0x27/0x50 [ 9132.421953] [<ffffffff81346f65>] pci_unregister_driver+0x25/0x70 [ 9132.421971] [<ffffffffa00229c8>] drm_pci_exit+0x78/0xa0 [drm] [ 9132.422000] [<ffffffffa017a6d2>] i915_exit+0x20/0x94e [i915] [ 9132.422009] [<ffffffff810fb9dc>] SyS_delete_module+0x13c/0x1f0 [ 9132.422019] [<ffffffff8131c5fb>] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 9132.422028] [<ffffffff816f7792>] system_call_fastpath+0x16/0x1b This means it has to be before intel_modeset_cleanup, which cleans the CRTC structures. But if we move it to before intel_fbdev_fini(), we get WARNs because intel_fbdev_fini() still tries to use the vblanks, so the only acceptable point for drm_vblank_cleanup() seems to be this place. Related commit: commit cbb47d179fb345c579cd8cd884693903fceed26a Author: Chris Wilson <chris@chris-wilson.co.uk> Date: Mon Sep 23 17:33:20 2013 -0300 drm/i915: Add some missing steps to i915_driver_load error path Testsuite: igt/drv_module_reload Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=77511 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=83484 Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-10-16 00:15:04 +07:00
drm_vblank_cleanup(dev);
intel_modeset_cleanup(dev);
/*
* free the memory space allocated for the child device
* config parsed from VBT
*/
if (dev_priv->vbt.child_dev && dev_priv->vbt.child_dev_num) {
kfree(dev_priv->vbt.child_dev);
dev_priv->vbt.child_dev = NULL;
dev_priv->vbt.child_dev_num = 0;
}
kfree(dev_priv->vbt.sdvo_lvds_vbt_mode);
dev_priv->vbt.sdvo_lvds_vbt_mode = NULL;
kfree(dev_priv->vbt.lfp_lvds_vbt_mode);
dev_priv->vbt.lfp_lvds_vbt_mode = NULL;
vga_switcheroo_unregister_client(dev->pdev);
vga_client_register(dev->pdev, NULL, NULL, NULL);
intel_csr_ucode_fini(dev_priv);
/* Free error state after interrupts are fully disabled. */
cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
i915_destroy_error_state(dev);
if (dev->pdev->msi_enabled)
pci_disable_msi(dev->pdev);
intel_opregion_fini(dev);
/* Flush any outstanding unpin_work. */
flush_workqueue(dev_priv->wq);
intel_guc_ucode_fini(dev);
drm/i915: Fix locking around GuC firmware load The GuC firmware load requires struct_mutex to create a GEM object, but this collides badly with request_firmware. Move struct_mutex locking down into the loader itself, so we don't hold it across the entire load process, including request_firmware. [ 20.451400] ====================================================== [ 20.451420] [ INFO: possible circular locking dependency detected ] [ 20.451441] 4.3.0-rc5+ #1 Tainted: G W [ 20.451457] ------------------------------------------------------- [ 20.451477] plymouthd/371 is trying to acquire lock: [ 20.451494] (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0093c62>] drm_gem_mmap+0x112/0x290 [drm] [ 20.451538] but task is already holding lock: [ 20.451557] (&mm->mmap_sem){++++++}, at: [<ffffffff811fd9ac>] vm_mmap_pgoff+0x8c/0xf0 [ 20.451591] which lock already depends on the new lock. [ 20.451617] the existing dependency chain (in reverse order) is: [ 20.451640] -> #3 (&mm->mmap_sem){++++++}: [ 20.451661] [<ffffffff8110644e>] lock_acquire+0xce/0x1c0 [ 20.451683] [<ffffffff8120ec9a>] __might_fault+0x7a/0xa0 [ 20.451705] [<ffffffff8127e34e>] filldir+0x9e/0x130 [ 20.451726] [<ffffffff81295b86>] dcache_readdir+0x186/0x230 [ 20.451748] [<ffffffff8127e117>] iterate_dir+0x97/0x130 [ 20.451769] [<ffffffff8127e66a>] SyS_getdents+0x9a/0x130 [ 20.451790] [<ffffffff8184f2f2>] entry_SYSCALL_64_fastpath+0x12/0x76 [ 20.451829] -> #2 (&sb->s_type->i_mutex_key#2){+.+.+.}: [ 20.451852] [<ffffffff8110644e>] lock_acquire+0xce/0x1c0 [ 20.451872] [<ffffffff8184b516>] mutex_lock_nested+0x86/0x400 [ 20.451893] [<ffffffff81277790>] walk_component+0x1d0/0x2a0 [ 20.451914] [<ffffffff812779f0>] link_path_walk+0x190/0x5a0 [ 20.451935] [<ffffffff8127803b>] path_openat+0xab/0x1260 [ 20.451955] [<ffffffff8127a651>] do_filp_open+0x91/0x100 [ 20.451975] [<ffffffff81267e67>] file_open_name+0xf7/0x150 [ 20.451995] [<ffffffff81267ef3>] filp_open+0x33/0x60 [ 20.452014] [<ffffffff8157e1e7>] _request_firmware+0x277/0x880 [ 20.452038] [<ffffffff8157e9e4>] request_firmware_work_func+0x34/0x80 [ 20.452060] [<ffffffff810c7020>] process_one_work+0x230/0x680 [ 20.452082] [<ffffffff810c74be>] worker_thread+0x4e/0x450 [ 20.452102] [<ffffffff810ce511>] kthread+0x101/0x120 [ 20.452121] [<ffffffff8184f66f>] ret_from_fork+0x3f/0x70 [ 20.452140] -> #1 (umhelper_sem){++++.+}: [ 20.452159] [<ffffffff8110644e>] lock_acquire+0xce/0x1c0 [ 20.452178] [<ffffffff8184c5c1>] down_read+0x51/0xa0 [ 20.452197] [<ffffffff810c203b>] usermodehelper_read_trylock+0x5b/0x130 [ 20.452221] [<ffffffff8157e147>] _request_firmware+0x1d7/0x880 [ 20.452242] [<ffffffff8157e821>] request_firmware+0x31/0x50 [ 20.452262] [<ffffffffa01b54a4>] intel_guc_ucode_init+0xf4/0x400 [i915] [ 20.452305] [<ffffffffa0213913>] i915_driver_load+0xd63/0x16e0 [i915] [ 20.452343] [<ffffffffa00987d9>] drm_dev_register+0xa9/0xc0 [drm] [ 20.452369] [<ffffffffa009ae3d>] drm_get_pci_dev+0x8d/0x1e0 [drm] [ 20.452396] [<ffffffffa01521e4>] i915_pci_probe+0x34/0x50 [i915] [ 20.452421] [<ffffffff81464675>] local_pci_probe+0x45/0xa0 [ 20.452443] [<ffffffff81465a6d>] pci_device_probe+0xfd/0x140 [ 20.452464] [<ffffffff8156a2e4>] driver_probe_device+0x224/0x480 [ 20.452486] [<ffffffff8156a5c8>] __driver_attach+0x88/0x90 [ 20.452505] [<ffffffff81567cf3>] bus_for_each_dev+0x73/0xc0 [ 20.452526] [<ffffffff81569a7e>] driver_attach+0x1e/0x20 [ 20.452546] [<ffffffff815695ae>] bus_add_driver+0x1ee/0x280 [ 20.452566] [<ffffffff8156b100>] driver_register+0x60/0xe0 [ 20.453197] [<ffffffff81464050>] __pci_register_driver+0x60/0x70 [ 20.453845] [<ffffffffa009b070>] drm_pci_init+0xe0/0x110 [drm] [ 20.454497] [<ffffffffa027f092>] 0xffffffffa027f092 [ 20.455156] [<ffffffff81002123>] do_one_initcall+0xb3/0x200 [ 20.455796] [<ffffffff811d8c01>] do_init_module+0x5f/0x1e7 [ 20.456434] [<ffffffff8114c4e6>] load_module+0x2126/0x27d0 [ 20.457071] [<ffffffff8114cdf9>] SyS_finit_module+0xb9/0xf0 [ 20.457738] [<ffffffff8184f2f2>] entry_SYSCALL_64_fastpath+0x12/0x76 [ 20.458370] -> #0 (&dev->struct_mutex){+.+.+.}: [ 20.459773] [<ffffffff8110584f>] __lock_acquire+0x191f/0x1ba0 [ 20.460451] [<ffffffff8110644e>] lock_acquire+0xce/0x1c0 [ 20.461074] [<ffffffffa0093c88>] drm_gem_mmap+0x138/0x290 [drm] [ 20.461693] [<ffffffff8121a5ec>] mmap_region+0x3ec/0x670 [ 20.462298] [<ffffffff8121abb2>] do_mmap+0x342/0x420 [ 20.462901] [<ffffffff811fd9d2>] vm_mmap_pgoff+0xb2/0xf0 [ 20.463532] [<ffffffff81218f62>] SyS_mmap_pgoff+0x1f2/0x290 [ 20.464118] [<ffffffff8102187b>] SyS_mmap+0x1b/0x30 [ 20.464702] [<ffffffff8184f2f2>] entry_SYSCALL_64_fastpath+0x12/0x76 [ 20.465289] other info that might help us debug this: [ 20.467179] Chain exists of: &dev->struct_mutex --> &sb->s_type->i_mutex_key#2 --> &mm->mmap_sem [ 20.468928] Possible unsafe locking scenario: [ 20.470161] CPU0 CPU1 [ 20.470745] ---- ---- [ 20.471325] lock(&mm->mmap_sem); [ 20.471902] lock(&sb->s_type->i_mutex_key#2); [ 20.472538] lock(&mm->mmap_sem); [ 20.473118] lock(&dev->struct_mutex); [ 20.473704] *** DEADLOCK *** Signed-off-by: Daniel Stone <daniels@collabora.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
2015-11-04 04:42:31 +07:00
mutex_lock(&dev->struct_mutex);
i915_gem_cleanup_engines(dev);
i915_gem_context_fini(dev);
mutex_unlock(&dev->struct_mutex);
intel_fbc_cleanup_cfb(dev_priv);
drm/i915: irq-drive the dp aux communication At least on the platforms that have a dp aux irq and also have it enabled - vlvhsw should have one, too. But I don't have a machine to test this on. Judging from docs there's no dp aux interrupt for gm45. Also, I only have an ivb cpu edp machine, so the dp aux A code for snb/ilk is untested. For dpcd probing when nothing is connected it slashes about 5ms of cpu time (cpu time is now negligible), which agrees with 3 * 5 400 usec timeouts. A previous version of this patch increases the time required to go through the dp_detect cycle (which includes reading the edid) from around 33 ms to around 40 ms. Experiments indicated that this is purely due to the irq latency - the hw doesn't allow us to queue up dp aux transactions and hence irq latency directly affects throughput. gmbus is much better, there we have a 8 byte buffer, and we get the irq once another 4 bytes can be queued up. But by using the pm_qos interface to request the lowest possible cpu wake-up latency this slowdown completely disappeared. Since all our output detection logic is single-threaded with the mode_config mutex right now anyway, I've decide not ot play fancy and to just reuse the gmbus wait queue. But this would definitely prep the way to run dp detection on different ports in parallel v2: Add a timeout for dp aux transfers when using interrupts - the hw _does_ prevent this with the hw-based 400 usec timeout, but if the irq somehow doesn't arrive we're screwed. Lesson learned while developing this ;-) v3: While at it also convert the busy-loop to wait_for_atomic, so that we don't run the risk of an infinite loop any more. v4: Ensure we have the smallest possible irq latency by using the pm_qos interface. v5: Add a comment to the code to explain why we frob pm_qos. Suggested by Chris Wilson. v6: Disable dp irq for vlv, that's easier than trying to get at docs and hw. v7: Squash in a fix for Haswell that Paulo Zanoni tracked down - the dp aux registers aren't at a fixed offset any more, but can be on the PCH while the DP port is on the cpu die. Reviewed-by: Imre Deak <imre.deak@intel.com> (v6) Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-12-01 19:53:48 +07:00
pm_qos_remove_request(&dev_priv->pm_qos);
i915_global_gtt_cleanup(dev);
intel_uncore_fini(dev);
i915_mmio_cleanup(dev);
i915_gem_load_cleanup(dev);
pci_dev_put(dev_priv->bridge_dev);
i915_workqueues_cleanup(dev_priv);
kfree(dev_priv);
return 0;
}
int i915_driver_open(struct drm_device *dev, struct drm_file *file)
{
drm/i915: Boost RPS frequency for CPU stalls If we encounter a situation where the CPU blocks waiting for results from the GPU, give the GPU a kick to boost its the frequency. This should work to reduce user interface stalls and to quickly promote mesa to high frequencies - but the cost is that our requested frequency stalls high (as we do not idle for long enough before rc6 to start reducing frequencies, nor are we aggressive at down clocking an underused GPU). However, this should be mitigated by rc6 itself powering off the GPU when idle, and that energy use is dependent upon the workload of the GPU in addition to its frequency (e.g. the math or sampler functions only consume power when used). Still, this is likely to adversely affect light workloads. In particular, this nearly eliminates the highly noticeable wake-up lag in animations from idle. For example, expose or workspace transitions. (However, given the situation where we fail to downclock, our requested frequency is almost always the maximum, except for Baytrail where we manually downclock upon idling. This often masks the latency of upclocking after being idle, so animations are typically smooth - at the cost of increased power consumption.) Stéphane raised the concern that this will punish good applications and reward bad applications - but due to the nature of how mesa performs its client throttling, I believe all mesa applications will be roughly equally affected. To address this concern, and to prevent applications like compositors from permanently boosting the RPS state, we ratelimit the frequency of the wait-boosts each client recieves. Unfortunately, this techinique is ineffective with Ironlake - which also has dynamic render power states and suffers just as dramatically. For Ironlake, the thermal/power headroom is shared with the CPU through Intelligent Power Sharing and the intel-ips module. This leaves us with no GPU boost frequencies available when coming out of idle, and due to hardware limitations we cannot change the arbitration between the CPU and GPU quickly enough to be effective. v2: Limit each client to receiving a single boost for each active period. Tested by QA to only marginally increase power, and to demonstrably increase throughput in games. No latency measurements yet. v3: Cater for front-buffer rendering with manual throttling. v4: Tidy up. v5: Sadly the compositor needs frequent boosts as it may never idle, but due to its picking mechanism (using ReadPixels) may require frequent waits. Those waits, along with the waits for the vrefresh swap, conspire to keep the GPU at low frequencies despite the interactive latency. To overcome this we ditch the one-boost-per-active-period and just ratelimit the number of wait-boosts each client can receive. Reported-and-tested-by: Paul Neumann <paul104x@yahoo.de> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68716 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Kenneth Graunke <kenneth@whitecape.org> Cc: Stéphane Marchesin <stephane.marchesin@gmail.com> Cc: Owen Taylor <otaylor@redhat.com> Cc: "Meng, Mengmeng" <mengmeng.meng@intel.com> Cc: "Zhuang, Lena" <lena.zhuang@intel.com> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> [danvet: No extern for function prototypes in headers.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-09-25 23:34:56 +07:00
int ret;
drm/i915: Boost RPS frequency for CPU stalls If we encounter a situation where the CPU blocks waiting for results from the GPU, give the GPU a kick to boost its the frequency. This should work to reduce user interface stalls and to quickly promote mesa to high frequencies - but the cost is that our requested frequency stalls high (as we do not idle for long enough before rc6 to start reducing frequencies, nor are we aggressive at down clocking an underused GPU). However, this should be mitigated by rc6 itself powering off the GPU when idle, and that energy use is dependent upon the workload of the GPU in addition to its frequency (e.g. the math or sampler functions only consume power when used). Still, this is likely to adversely affect light workloads. In particular, this nearly eliminates the highly noticeable wake-up lag in animations from idle. For example, expose or workspace transitions. (However, given the situation where we fail to downclock, our requested frequency is almost always the maximum, except for Baytrail where we manually downclock upon idling. This often masks the latency of upclocking after being idle, so animations are typically smooth - at the cost of increased power consumption.) Stéphane raised the concern that this will punish good applications and reward bad applications - but due to the nature of how mesa performs its client throttling, I believe all mesa applications will be roughly equally affected. To address this concern, and to prevent applications like compositors from permanently boosting the RPS state, we ratelimit the frequency of the wait-boosts each client recieves. Unfortunately, this techinique is ineffective with Ironlake - which also has dynamic render power states and suffers just as dramatically. For Ironlake, the thermal/power headroom is shared with the CPU through Intelligent Power Sharing and the intel-ips module. This leaves us with no GPU boost frequencies available when coming out of idle, and due to hardware limitations we cannot change the arbitration between the CPU and GPU quickly enough to be effective. v2: Limit each client to receiving a single boost for each active period. Tested by QA to only marginally increase power, and to demonstrably increase throughput in games. No latency measurements yet. v3: Cater for front-buffer rendering with manual throttling. v4: Tidy up. v5: Sadly the compositor needs frequent boosts as it may never idle, but due to its picking mechanism (using ReadPixels) may require frequent waits. Those waits, along with the waits for the vrefresh swap, conspire to keep the GPU at low frequencies despite the interactive latency. To overcome this we ditch the one-boost-per-active-period and just ratelimit the number of wait-boosts each client can receive. Reported-and-tested-by: Paul Neumann <paul104x@yahoo.de> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68716 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Kenneth Graunke <kenneth@whitecape.org> Cc: Stéphane Marchesin <stephane.marchesin@gmail.com> Cc: Owen Taylor <otaylor@redhat.com> Cc: "Meng, Mengmeng" <mengmeng.meng@intel.com> Cc: "Zhuang, Lena" <lena.zhuang@intel.com> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> [danvet: No extern for function prototypes in headers.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-09-25 23:34:56 +07:00
ret = i915_gem_open(dev, file);
if (ret)
return ret;
drm/i915: preliminary context support Very basic code for context setup/destruction in the driver. Adds the file i915_gem_context.c This file implements HW context support. On gen5+ a HW context consists of an opaque GPU object which is referenced at times of context saves and restores. With RC6 enabled, the context is also referenced as the GPU enters and exists from RC6 (GPU has it's own internal power context, except on gen5). Though something like a context does exist for the media ring, the code only supports contexts for the render ring. In software, there is a distinction between contexts created by the user, and the default HW context. The default HW context is used by GPU clients that do not request setup of their own hardware context. The default context's state is never restored to help prevent programming errors. This would happen if a client ran and piggy-backed off another clients GPU state. The default context only exists to give the GPU some offset to load as the current to invoke a save of the context we actually care about. In fact, the code could likely be constructed, albeit in a more complicated fashion, to never use the default context, though that limits the driver's ability to swap out, and/or destroy other contexts. All other contexts are created as a request by the GPU client. These contexts store GPU state, and thus allow GPU clients to not re-emit state (and potentially query certain state) at any time. The kernel driver makes certain that the appropriate commands are inserted. There are 4 entry points into the contexts, init, fini, open, close. The names are self-explanatory except that init can be called during reset, and also during pm thaw/resume. As we expect our context to be preserved across these events, we do not reinitialize in this case. As Adam Jackson pointed out, The cutoff of 1MB where a HW context is considered too big is arbitrary. The reason for this is even though context sizes are increasing with every generation, they have yet to eclipse even 32k. If we somehow read back way more than that, it probably means BIOS has done something strange, or we're running on a platform that wasn't designed for this. v2: rename load/unload to init/fini (daniel) remove ILK support for get_size() (indirectly daniel) add HAS_HW_CONTEXTS macro to clarify supported platforms (daniel) added comments (Ben) Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
2012-06-05 04:42:42 +07:00
return 0;
}
/**
* i915_driver_lastclose - clean up after all DRM clients have exited
* @dev: DRM device
*
* Take care of cleaning up after all DRM clients have exited. In the
* mode setting case, we want to restore the kernel's initial mode (just
* in case the last client left us in a bad state).
*
* Additionally, in the non-mode setting case, we'll tear down the GTT
* and DMA structures, since the kernel won't be using them, and clea
* up any GEM state.
*/
void i915_driver_lastclose(struct drm_device *dev)
{
intel_fbdev_restore_mode(dev);
vga_switcheroo_process_delayed_switch();
}
void i915_driver_preclose(struct drm_device *dev, struct drm_file *file)
{
mutex_lock(&dev->struct_mutex);
i915_gem_context_close(dev, file);
i915_gem_release(dev, file);
mutex_unlock(&dev->struct_mutex);
}
void i915_driver_postclose(struct drm_device *dev, struct drm_file *file)
{
struct drm_i915_file_private *file_priv = file->driver_priv;
kfree(file_priv);
}
static int
i915_gem_reject_pin_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
return -ENODEV;
}
const struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_FLUSH, drm_noop, DRM_AUTH),
DRM_IOCTL_DEF_DRV(I915_FLIP, drm_noop, DRM_AUTH),
DRM_IOCTL_DEF_DRV(I915_BATCHBUFFER, drm_noop, DRM_AUTH),
DRM_IOCTL_DEF_DRV(I915_IRQ_EMIT, drm_noop, DRM_AUTH),
DRM_IOCTL_DEF_DRV(I915_IRQ_WAIT, drm_noop, DRM_AUTH),
DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_SETPARAM, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_ALLOC, drm_noop, DRM_AUTH),
DRM_IOCTL_DEF_DRV(I915_FREE, drm_noop, DRM_AUTH),
DRM_IOCTL_DEF_DRV(I915_INIT_HEAP, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_CMDBUFFER, drm_noop, DRM_AUTH),
DRM_IOCTL_DEF_DRV(I915_DESTROY_HEAP, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_SET_VBLANK_PIPE, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GET_VBLANK_PIPE, drm_noop, DRM_AUTH),
DRM_IOCTL_DEF_DRV(I915_VBLANK_SWAP, drm_noop, DRM_AUTH),
DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH),
DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_SET_CACHING, i915_gem_set_caching_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_GET_CACHING, i915_gem_get_caching_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_ENTERVT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_LEAVEVT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_CREATE, i915_gem_create_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_PREAD, i915_gem_pread_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_PWRITE, i915_gem_pwrite_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_MMAP, i915_gem_mmap_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_GET_TILING, i915_gem_get_tiling, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, 0),
DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image, DRM_MASTER|DRM_CONTROL_ALLOW),
DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs, DRM_MASTER|DRM_CONTROL_ALLOW),
DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey, DRM_MASTER|DRM_CONTROL_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM, i915_gem_context_getparam_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM, i915_gem_context_setparam_ioctl, DRM_RENDER_ALLOW),
};
int i915_max_ioctl = ARRAY_SIZE(i915_ioctls);