2005-04-17 05:20:36 +07:00
|
|
|
/* i915_drv.h -- Private header for the I915 driver -*- linux-c -*-
|
|
|
|
*/
|
2006-01-02 16:14:23 +07:00
|
|
|
/*
|
2005-06-23 19:46:46 +07:00
|
|
|
*
|
2005-04-17 05:20:36 +07:00
|
|
|
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
|
|
|
|
* All Rights Reserved.
|
2005-06-23 19:46:46 +07:00
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the
|
|
|
|
* "Software"), to deal in the Software without restriction, including
|
|
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
|
|
* distribute, sub license, and/or sell copies of the Software, and to
|
|
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
|
|
* the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the
|
|
|
|
* next paragraph) shall be included in all copies or substantial portions
|
|
|
|
* of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
|
|
|
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
|
|
|
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
*
|
2006-01-02 16:14:23 +07:00
|
|
|
*/
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
#ifndef _I915_DRV_H_
|
|
|
|
#define _I915_DRV_H_
|
|
|
|
|
2012-12-04 04:03:14 +07:00
|
|
|
#include <uapi/drm/i915_drm.h>
|
|
|
|
|
2008-07-30 01:54:06 +07:00
|
|
|
#include "i915_reg.h"
|
DRM: i915: add mode setting support
This commit adds i915 driver support for the DRM mode setting APIs.
Currently, VGA, LVDS, SDVO DVI & VGA, TV and DVO LVDS outputs are
supported. HDMI, DisplayPort and additional SDVO output support will
follow.
Support for the mode setting code is controlled by the new 'modeset'
module option. A new config option, CONFIG_DRM_I915_KMS controls the
default behavior, and whether a PCI ID list is built into the module for
use by user level module utilities.
Note that if mode setting is enabled, user level drivers that access
display registers directly or that don't use the kernel graphics memory
manager will likely corrupt kernel graphics memory, disrupt output
configuration (possibly leading to hangs and/or blank displays), and
prevent panic/oops messages from appearing. So use caution when
enabling this code; be sure your user level code supports the new
interfaces.
A new SysRq key, 'g', provides emergency support for switching back to
the kernel's framebuffer console; which is useful for testing.
Co-authors: Dave Airlie <airlied@linux.ie>, Hong Liu <hong.liu@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2008-11-08 05:24:08 +07:00
|
|
|
#include "intel_bios.h"
|
2010-05-21 08:08:55 +07:00
|
|
|
#include "intel_ringbuffer.h"
|
2014-07-24 23:04:10 +07:00
|
|
|
#include "intel_lrc.h"
|
drm/i915: Split out GTT specific header file
This file contains all necessary defines, prototypes and typesdefs for
manipulating GEN graphics address translation (this does not include the
legacy AGP driver)
Reiterating the comment in the header,
"Please try to maintain the following order within this file unless it
makes sense to do otherwise. From top to bottom:
1. typedefs
2. #defines, and macros
3. structure definitions
4. function prototypes
Within each section, please try to order by generation in ascending
order, from top to bottom (ie. GEN6 on the top, GEN8 on the bottom)."
I've made some minor cleanups, and fixed a couple of typos while here -
but there should be no functional changes.
The purpose of the patch is to reduce clutter in our main header file,
making room for new growth, and make documentation of our interfaces
easier by splitting things out.
With a little more work, like making i915_gtt a pointer, we could
potentially completely isolate this header from i915_drv.h. At the
moment however, I don't think it's worth the effort.
Personally, I would have liked to put the PTE encoding functions in this
file too, but I didn't want to rock the boat too much.
A similar patch has been in use on my machine for some time. This exact
patch though has only been compile tested.
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-03-23 12:47:21 +07:00
|
|
|
#include "i915_gem_gtt.h"
|
2014-08-21 17:40:54 +07:00
|
|
|
#include "i915_gem_render_state.h"
|
2008-10-31 09:38:48 +07:00
|
|
|
#include <linux/io-mapping.h>
|
2010-07-21 05:44:45 +07:00
|
|
|
#include <linux/i2c.h>
|
2012-02-28 06:43:09 +07:00
|
|
|
#include <linux/i2c-algo-bit.h>
|
2010-08-25 03:18:41 +07:00
|
|
|
#include <drm/intel-gtt.h>
|
2014-09-11 12:43:25 +07:00
|
|
|
#include <drm/drm_legacy.h> /* for struct drm_dma_handle */
|
2014-09-23 20:46:53 +07:00
|
|
|
#include <drm/drm_gem.h>
|
2011-08-12 17:11:33 +07:00
|
|
|
#include <linux/backlight.h>
|
drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).
v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.
v11: Drop vma behaviour changes -- locking is nigh on impossible.
Use a worker to load user pages to avoid lock inversions.
v12: Use get_task_mm()/mmput() for correct refcounting of mm.
v13: Use a worker to release the mmu_notifier to avoid lock inversion
v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
with its own locking and tree of objects for each mm/mmu_notifier.
v15: Prevent overlapping userptr objects, and invalidate all objects
within the mmu_notifier range
v16: Fix a typo for iterating over multiple objects in the range and
rearrange error path to destroy the mmu_notifier locklessly.
Also close a race between invalidate_range and the get_pages_worker.
v17: Close a race between get_pages_worker/invalidate_range and fresh
allocations of the same userptr range - and notice that
struct_mutex was presumed to be held when during creation it wasn't.
v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory
for the struct sg_table and to clear it before reporting an error.
v19: Always error out on read-only userptr requests as we don't have the
hardware infrastructure to support them at the moment.
v20: Refuse to implement read-only support until we have the required
infrastructure - but reserve the bit in flags for future use.
v21: use_mm() is not required for get_user_pages(). It is only meant to
be used to fix up the kernel thread's current->mm for use with
copy_user().
v22: Use sg_alloc_table_from_pages for that chunky feeling
v23: Export a function for sanity checking dma-buf rather than encode
userptr details elsewhere, and clean up comments based on
suggestions by Bradley.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Akash Goel <akash.goel@intel.com>
Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com>
[danvet: Frob ioctl allocation to pick the next one - will cause a bit
of fuss with create2 apparently, but such are the rules.]
[danvet2: oops, forgot to git add after manual patch application]
[danvet3: Appease sparse.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-05-16 20:22:37 +07:00
|
|
|
#include <linux/hashtable.h>
|
2012-04-06 04:47:36 +07:00
|
|
|
#include <linux/intel-iommu.h>
|
2012-04-27 20:17:39 +07:00
|
|
|
#include <linux/kref.h>
|
drm/i915: irq-drive the dp aux communication
At least on the platforms that have a dp aux irq and also have it
enabled - vlvhsw should have one, too. But I don't have a machine to
test this on. Judging from docs there's no dp aux interrupt for gm45.
Also, I only have an ivb cpu edp machine, so the dp aux A code for
snb/ilk is untested.
For dpcd probing when nothing is connected it slashes about 5ms of cpu
time (cpu time is now negligible), which agrees with 3 * 5 400 usec
timeouts.
A previous version of this patch increases the time required to go
through the dp_detect cycle (which includes reading the edid) from
around 33 ms to around 40 ms. Experiments indicated that this is
purely due to the irq latency - the hw doesn't allow us to queue up
dp aux transactions and hence irq latency directly affects throughput.
gmbus is much better, there we have a 8 byte buffer, and we get the
irq once another 4 bytes can be queued up.
But by using the pm_qos interface to request the lowest possible cpu
wake-up latency this slowdown completely disappeared.
Since all our output detection logic is single-threaded with the
mode_config mutex right now anyway, I've decide not ot play fancy and
to just reuse the gmbus wait queue. But this would definitely prep the
way to run dp detection on different ports in parallel
v2: Add a timeout for dp aux transfers when using interrupts - the hw
_does_ prevent this with the hw-based 400 usec timeout, but if the
irq somehow doesn't arrive we're screwed. Lesson learned while
developing this ;-)
v3: While at it also convert the busy-loop to wait_for_atomic, so that
we don't run the risk of an infinite loop any more.
v4: Ensure we have the smallest possible irq latency by using the
pm_qos interface.
v5: Add a comment to the code to explain why we frob pm_qos. Suggested
by Chris Wilson.
v6: Disable dp irq for vlv, that's easier than trying to get at docs
and hw.
v7: Squash in a fix for Haswell that Paulo Zanoni tracked down - the
dp aux registers aren't at a fixed offset any more, but can be on the
PCH while the DP port is on the cpu die.
Reviewed-by: Imre Deak <imre.deak@intel.com> (v6)
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-12-01 19:53:48 +07:00
|
|
|
#include <linux/pm_qos.h>
|
2008-07-30 01:54:06 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* General customization:
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define DRIVER_NAME "i915"
|
|
|
|
#define DRIVER_DESC "Intel Graphics"
|
2014-11-21 16:37:14 +07:00
|
|
|
#define DRIVER_DATE "20141121"
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2014-10-28 22:32:30 +07:00
|
|
|
#undef WARN_ON
|
|
|
|
#define WARN_ON(x) WARN(x, "WARN_ON(" #x ")")
|
|
|
|
|
2008-08-26 05:11:06 +07:00
|
|
|
enum pipe {
|
2013-10-31 23:55:49 +07:00
|
|
|
INVALID_PIPE = -1,
|
2008-08-26 05:11:06 +07:00
|
|
|
PIPE_A = 0,
|
|
|
|
PIPE_B,
|
2011-02-08 03:26:52 +07:00
|
|
|
PIPE_C,
|
drm/i915: Reorganize display pipe register accesses
RFCv2: Reorganize array indexing so that full offsets can be used as
is. It makes grepping for registers in i915_reg.h much easier. Also
move offset arrays to intel_device_info.
v1: Fixed offsets for VLV, proper eDP handling
v2: Fixed BCLRPAT, PIPESRC, PIPECONF and DSP* macros.
v3: Added EDP pipe comment, removed redundant offset arrays for
MSA_MISC and DDI_FUNC_CTL.
v4: Rename patch and report object size increase.
v5: Change location of commas, add PIPE_EDP into enum pipe
v6: Insert PIPE_EDP_OFFSET into pipe offset array
v7: Set I915_MAX_PIPES back to 3, change more registers accessors
to use the new macros, get rid of _PIPE_INC and add dev_priv
as a parameter where required by the new macros.
Upcoming hardware will not have the various display pipe register
ranges evenly spaced in memory. Change register address calculations
into array lookups.
Tested on SNB, VLV, IVB, Gen2 and HSW w/eDP.
I left the UMS cruft untouched.
Size differences:
text data bss dec hex filename
596431 4634 56 601121 92c21 i915.ko (new)
593199 4634 56 597889 91f81 i915.ko (old)
Signed-off-by: Antti Koskipaa <antti.koskipaa@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Tested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-04 19:22:24 +07:00
|
|
|
_PIPE_EDP,
|
|
|
|
I915_MAX_PIPES = _PIPE_EDP
|
2008-08-26 05:11:06 +07:00
|
|
|
};
|
2011-02-08 03:26:52 +07:00
|
|
|
#define pipe_name(p) ((p) + 'A')
|
2008-08-26 05:11:06 +07:00
|
|
|
|
drm/i915: add TRANSCODER_EDP
Before Haswell we used to have the CPU pipes and the PCH transcoders.
We had the same amount of pipes and transcoders, and there was a 1:1
mapping between them. After Haswell what we used to call CPU pipe was
split into CPU pipe and CPU transcoder. So now we have 3 CPU pipes (A,
B and C), 4 CPU transcoders (A, B, C and EDP) and 1 PCH transcoder
(only used for VGA).
For all the outputs except for EDP we have an 1:1 mapping on the CPU
pipes and CPU transcoders, so if you're using CPU pipe A you have to
use CPU transcoder A. When have an eDP output you have to use
transcoder EDP and you can attach this CPU transcoder to any of the 3
CPU pipes. When using VGA you need to select a pair of matching CPU
pipes/transcoders (A/A, B/B, C/C) and you also need to enable/use the
PCH transcoder.
For now we're just creating the cpu_transcoder definitions and setting
cpu_transcoder to TRANSCODER_EDP on DDI eDP code, but none of the
registers was ported to use transcoder instead of pipe. The goal is to
keep the code backwards-compatible since on all cases except when
using eDP we must have pipe == cpu_transcoder.
V2: Comment the haswell_crtc_off chunk, suggested by Damien Lespiau
and Daniel Vetter.
We currently need the haswell_crtc_off chunk because TRANSCODER_EDP
can be used by any CRTC, so when you stop using it you have to stop
saying you're using it, otherwise you may have at some point 2 CRTCs
claiming they're using TRANSCODER_EDP (a disabled CRTC and an enabled
one), then the HW state readout code will get completely confused.
In other words:
Imagine the following case:
xrandr --output eDP1 --auto --crtc 0
xrandr --output eDP1 --off
xrandr --output eDP1 --auto --crtc 2
After the last command you could get a "pipe A assertion failure
(expected off, current on)" because CRTC 0 still claims it's using
TRANSCODER_EDP, so the HW state readout function will read it
(through PIPECONF) and expect it to be off, when it's actually on
because it's being used by CRTC 2.
So when we make "intel_crtc->cpu_transcoder = intel_crtc->pipe" we
make sure we're pointing to our own original CRTC which is certainly
not used by any other CRTC.
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-10-25 00:59:34 +07:00
|
|
|
enum transcoder {
|
|
|
|
TRANSCODER_A = 0,
|
|
|
|
TRANSCODER_B,
|
|
|
|
TRANSCODER_C,
|
drm/i915: Reorganize display pipe register accesses
RFCv2: Reorganize array indexing so that full offsets can be used as
is. It makes grepping for registers in i915_reg.h much easier. Also
move offset arrays to intel_device_info.
v1: Fixed offsets for VLV, proper eDP handling
v2: Fixed BCLRPAT, PIPESRC, PIPECONF and DSP* macros.
v3: Added EDP pipe comment, removed redundant offset arrays for
MSA_MISC and DDI_FUNC_CTL.
v4: Rename patch and report object size increase.
v5: Change location of commas, add PIPE_EDP into enum pipe
v6: Insert PIPE_EDP_OFFSET into pipe offset array
v7: Set I915_MAX_PIPES back to 3, change more registers accessors
to use the new macros, get rid of _PIPE_INC and add dev_priv
as a parameter where required by the new macros.
Upcoming hardware will not have the various display pipe register
ranges evenly spaced in memory. Change register address calculations
into array lookups.
Tested on SNB, VLV, IVB, Gen2 and HSW w/eDP.
I left the UMS cruft untouched.
Size differences:
text data bss dec hex filename
596431 4634 56 601121 92c21 i915.ko (new)
593199 4634 56 597889 91f81 i915.ko (old)
Signed-off-by: Antti Koskipaa <antti.koskipaa@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Tested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-04 19:22:24 +07:00
|
|
|
TRANSCODER_EDP,
|
|
|
|
I915_MAX_TRANSCODERS
|
drm/i915: add TRANSCODER_EDP
Before Haswell we used to have the CPU pipes and the PCH transcoders.
We had the same amount of pipes and transcoders, and there was a 1:1
mapping between them. After Haswell what we used to call CPU pipe was
split into CPU pipe and CPU transcoder. So now we have 3 CPU pipes (A,
B and C), 4 CPU transcoders (A, B, C and EDP) and 1 PCH transcoder
(only used for VGA).
For all the outputs except for EDP we have an 1:1 mapping on the CPU
pipes and CPU transcoders, so if you're using CPU pipe A you have to
use CPU transcoder A. When have an eDP output you have to use
transcoder EDP and you can attach this CPU transcoder to any of the 3
CPU pipes. When using VGA you need to select a pair of matching CPU
pipes/transcoders (A/A, B/B, C/C) and you also need to enable/use the
PCH transcoder.
For now we're just creating the cpu_transcoder definitions and setting
cpu_transcoder to TRANSCODER_EDP on DDI eDP code, but none of the
registers was ported to use transcoder instead of pipe. The goal is to
keep the code backwards-compatible since on all cases except when
using eDP we must have pipe == cpu_transcoder.
V2: Comment the haswell_crtc_off chunk, suggested by Damien Lespiau
and Daniel Vetter.
We currently need the haswell_crtc_off chunk because TRANSCODER_EDP
can be used by any CRTC, so when you stop using it you have to stop
saying you're using it, otherwise you may have at some point 2 CRTCs
claiming they're using TRANSCODER_EDP (a disabled CRTC and an enabled
one), then the HW state readout code will get completely confused.
In other words:
Imagine the following case:
xrandr --output eDP1 --auto --crtc 0
xrandr --output eDP1 --off
xrandr --output eDP1 --auto --crtc 2
After the last command you could get a "pipe A assertion failure
(expected off, current on)" because CRTC 0 still claims it's using
TRANSCODER_EDP, so the HW state readout function will read it
(through PIPECONF) and expect it to be off, when it's actually on
because it's being used by CRTC 2.
So when we make "intel_crtc->cpu_transcoder = intel_crtc->pipe" we
make sure we're pointing to our own original CRTC which is certainly
not used by any other CRTC.
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-10-25 00:59:34 +07:00
|
|
|
};
|
|
|
|
#define transcoder_name(t) ((t) + 'A')
|
|
|
|
|
2014-03-28 01:48:32 +07:00
|
|
|
/*
|
|
|
|
* This is the maximum (across all platforms) number of planes (primary +
|
|
|
|
* sprites) that can be active at the same time on one pipe.
|
|
|
|
*
|
|
|
|
* This value doesn't count the cursor plane.
|
|
|
|
*/
|
|
|
|
#define I915_MAX_PLANES 3
|
|
|
|
|
2009-09-11 05:28:06 +07:00
|
|
|
enum plane {
|
|
|
|
PLANE_A = 0,
|
|
|
|
PLANE_B,
|
2011-02-08 03:26:52 +07:00
|
|
|
PLANE_C,
|
2009-09-11 05:28:06 +07:00
|
|
|
};
|
2011-02-08 03:26:52 +07:00
|
|
|
#define plane_name(p) ((p) + 'A')
|
2008-11-19 00:30:25 +07:00
|
|
|
|
2014-03-04 00:31:48 +07:00
|
|
|
#define sprite_name(p, s) ((p) * INTEL_INFO(dev)->num_sprites[(p)] + (s) + 'A')
|
2013-04-17 21:48:51 +07:00
|
|
|
|
2012-03-29 22:32:22 +07:00
|
|
|
enum port {
|
|
|
|
PORT_A = 0,
|
|
|
|
PORT_B,
|
|
|
|
PORT_C,
|
|
|
|
PORT_D,
|
|
|
|
PORT_E,
|
|
|
|
I915_MAX_PORTS
|
|
|
|
};
|
|
|
|
#define port_name(p) ((p) + 'A')
|
|
|
|
|
2014-04-09 17:28:14 +07:00
|
|
|
#define I915_NUM_PHYS_VLV 2
|
2013-11-06 13:36:35 +07:00
|
|
|
|
|
|
|
enum dpio_channel {
|
|
|
|
DPIO_CH0,
|
|
|
|
DPIO_CH1
|
|
|
|
};
|
|
|
|
|
|
|
|
enum dpio_phy {
|
|
|
|
DPIO_PHY0,
|
|
|
|
DPIO_PHY1
|
|
|
|
};
|
|
|
|
|
2013-05-03 22:15:36 +07:00
|
|
|
enum intel_display_power_domain {
|
|
|
|
POWER_DOMAIN_PIPE_A,
|
|
|
|
POWER_DOMAIN_PIPE_B,
|
|
|
|
POWER_DOMAIN_PIPE_C,
|
|
|
|
POWER_DOMAIN_PIPE_A_PANEL_FITTER,
|
|
|
|
POWER_DOMAIN_PIPE_B_PANEL_FITTER,
|
|
|
|
POWER_DOMAIN_PIPE_C_PANEL_FITTER,
|
|
|
|
POWER_DOMAIN_TRANSCODER_A,
|
|
|
|
POWER_DOMAIN_TRANSCODER_B,
|
|
|
|
POWER_DOMAIN_TRANSCODER_C,
|
2013-10-16 21:25:48 +07:00
|
|
|
POWER_DOMAIN_TRANSCODER_EDP,
|
2014-03-05 00:22:57 +07:00
|
|
|
POWER_DOMAIN_PORT_DDI_A_2_LANES,
|
|
|
|
POWER_DOMAIN_PORT_DDI_A_4_LANES,
|
|
|
|
POWER_DOMAIN_PORT_DDI_B_2_LANES,
|
|
|
|
POWER_DOMAIN_PORT_DDI_B_4_LANES,
|
|
|
|
POWER_DOMAIN_PORT_DDI_C_2_LANES,
|
|
|
|
POWER_DOMAIN_PORT_DDI_C_4_LANES,
|
|
|
|
POWER_DOMAIN_PORT_DDI_D_2_LANES,
|
|
|
|
POWER_DOMAIN_PORT_DDI_D_4_LANES,
|
|
|
|
POWER_DOMAIN_PORT_DSI,
|
|
|
|
POWER_DOMAIN_PORT_CRT,
|
|
|
|
POWER_DOMAIN_PORT_OTHER,
|
2013-09-16 21:38:30 +07:00
|
|
|
POWER_DOMAIN_VGA,
|
2013-11-25 22:15:28 +07:00
|
|
|
POWER_DOMAIN_AUDIO,
|
2014-07-04 21:27:38 +07:00
|
|
|
POWER_DOMAIN_PLLS,
|
2013-10-25 21:36:48 +07:00
|
|
|
POWER_DOMAIN_INIT,
|
2013-10-16 21:25:49 +07:00
|
|
|
|
|
|
|
POWER_DOMAIN_NUM,
|
2013-05-03 22:15:36 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
#define POWER_DOMAIN_PIPE(pipe) ((pipe) + POWER_DOMAIN_PIPE_A)
|
|
|
|
#define POWER_DOMAIN_PIPE_PANEL_FITTER(pipe) \
|
|
|
|
((pipe) + POWER_DOMAIN_PIPE_A_PANEL_FITTER)
|
2013-10-16 21:25:48 +07:00
|
|
|
#define POWER_DOMAIN_TRANSCODER(tran) \
|
|
|
|
((tran) == TRANSCODER_EDP ? POWER_DOMAIN_TRANSCODER_EDP : \
|
|
|
|
(tran) + POWER_DOMAIN_TRANSCODER_A)
|
2013-05-03 22:15:36 +07:00
|
|
|
|
2013-02-26 00:06:49 +07:00
|
|
|
enum hpd_pin {
|
|
|
|
HPD_NONE = 0,
|
|
|
|
HPD_PORT_A = HPD_NONE, /* PORT_A is internal */
|
|
|
|
HPD_TV = HPD_NONE, /* TV is known to be unreliable */
|
|
|
|
HPD_CRT,
|
|
|
|
HPD_SDVO_B,
|
|
|
|
HPD_SDVO_C,
|
|
|
|
HPD_PORT_B,
|
|
|
|
HPD_PORT_C,
|
|
|
|
HPD_PORT_D,
|
|
|
|
HPD_NUM_PINS
|
|
|
|
};
|
|
|
|
|
2012-12-03 18:49:06 +07:00
|
|
|
#define I915_GEM_GPU_DOMAINS \
|
|
|
|
(I915_GEM_DOMAIN_RENDER | \
|
|
|
|
I915_GEM_DOMAIN_SAMPLER | \
|
|
|
|
I915_GEM_DOMAIN_COMMAND | \
|
|
|
|
I915_GEM_DOMAIN_INSTRUCTION | \
|
|
|
|
I915_GEM_DOMAIN_VERTEX)
|
2010-05-22 03:26:39 +07:00
|
|
|
|
2014-08-18 19:49:10 +07:00
|
|
|
#define for_each_pipe(__dev_priv, __p) \
|
|
|
|
for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++)
|
2014-09-04 18:27:43 +07:00
|
|
|
#define for_each_plane(pipe, p) \
|
|
|
|
for ((p) = 0; (p) < INTEL_INFO(dev)->num_sprites[(pipe)] + 1; (p)++)
|
2014-03-04 00:31:48 +07:00
|
|
|
#define for_each_sprite(p, s) for ((s) = 0; (s) < INTEL_INFO(dev)->num_sprites[(p)]; (s)++)
|
2011-02-08 03:26:52 +07:00
|
|
|
|
2014-05-14 05:32:23 +07:00
|
|
|
#define for_each_crtc(dev, crtc) \
|
|
|
|
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
|
|
|
|
|
2014-05-14 05:32:21 +07:00
|
|
|
#define for_each_intel_crtc(dev, intel_crtc) \
|
|
|
|
list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head)
|
|
|
|
|
2014-08-05 17:29:37 +07:00
|
|
|
#define for_each_intel_encoder(dev, intel_encoder) \
|
|
|
|
list_for_each_entry(intel_encoder, \
|
|
|
|
&(dev)->mode_config.encoder_list, \
|
|
|
|
base.head)
|
|
|
|
|
2012-07-05 14:50:24 +07:00
|
|
|
#define for_each_encoder_on_crtc(dev, __crtc, intel_encoder) \
|
|
|
|
list_for_each_entry((intel_encoder), &(dev)->mode_config.encoder_list, base.head) \
|
|
|
|
if ((intel_encoder)->base.crtc == (__crtc))
|
|
|
|
|
2014-02-08 03:48:15 +07:00
|
|
|
#define for_each_connector_on_encoder(dev, __encoder, intel_connector) \
|
|
|
|
list_for_each_entry((intel_connector), &(dev)->mode_config.connector_list, base.head) \
|
|
|
|
if ((intel_connector)->base.encoder == (__encoder))
|
|
|
|
|
2014-07-12 11:32:27 +07:00
|
|
|
#define for_each_power_domain(domain, mask) \
|
|
|
|
for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \
|
|
|
|
if ((1 << (domain)) & (mask))
|
|
|
|
|
2013-06-05 18:34:14 +07:00
|
|
|
struct drm_i915_private;
|
2014-08-07 20:20:40 +07:00
|
|
|
struct i915_mm_struct;
|
drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).
v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.
v11: Drop vma behaviour changes -- locking is nigh on impossible.
Use a worker to load user pages to avoid lock inversions.
v12: Use get_task_mm()/mmput() for correct refcounting of mm.
v13: Use a worker to release the mmu_notifier to avoid lock inversion
v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
with its own locking and tree of objects for each mm/mmu_notifier.
v15: Prevent overlapping userptr objects, and invalidate all objects
within the mmu_notifier range
v16: Fix a typo for iterating over multiple objects in the range and
rearrange error path to destroy the mmu_notifier locklessly.
Also close a race between invalidate_range and the get_pages_worker.
v17: Close a race between get_pages_worker/invalidate_range and fresh
allocations of the same userptr range - and notice that
struct_mutex was presumed to be held when during creation it wasn't.
v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory
for the struct sg_table and to clear it before reporting an error.
v19: Always error out on read-only userptr requests as we don't have the
hardware infrastructure to support them at the moment.
v20: Refuse to implement read-only support until we have the required
infrastructure - but reserve the bit in flags for future use.
v21: use_mm() is not required for get_user_pages(). It is only meant to
be used to fix up the kernel thread's current->mm for use with
copy_user().
v22: Use sg_alloc_table_from_pages for that chunky feeling
v23: Export a function for sanity checking dma-buf rather than encode
userptr details elsewhere, and clean up comments based on
suggestions by Bradley.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Akash Goel <akash.goel@intel.com>
Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com>
[danvet: Frob ioctl allocation to pick the next one - will cause a bit
of fuss with create2 apparently, but such are the rules.]
[danvet2: oops, forgot to git add after manual patch application]
[danvet3: Appease sparse.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-05-16 20:22:37 +07:00
|
|
|
struct i915_mmu_object;
|
2013-06-05 18:34:14 +07:00
|
|
|
|
2013-06-05 18:34:12 +07:00
|
|
|
enum intel_dpll_id {
|
|
|
|
DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
|
|
|
|
/* real shared dpll ids must be >= 0 */
|
2014-06-26 02:01:57 +07:00
|
|
|
DPLL_ID_PCH_PLL_A = 0,
|
|
|
|
DPLL_ID_PCH_PLL_B = 1,
|
2014-11-13 21:55:14 +07:00
|
|
|
/* hsw/bdw */
|
2014-06-26 02:01:57 +07:00
|
|
|
DPLL_ID_WRPLL1 = 0,
|
|
|
|
DPLL_ID_WRPLL2 = 1,
|
2014-11-13 21:55:14 +07:00
|
|
|
/* skl */
|
|
|
|
DPLL_ID_SKL_DPLL1 = 0,
|
|
|
|
DPLL_ID_SKL_DPLL2 = 1,
|
|
|
|
DPLL_ID_SKL_DPLL3 = 2,
|
2013-06-05 18:34:12 +07:00
|
|
|
};
|
2014-11-13 21:55:14 +07:00
|
|
|
#define I915_NUM_PLLS 3
|
2013-06-05 18:34:12 +07:00
|
|
|
|
2013-06-05 18:34:16 +07:00
|
|
|
struct intel_dpll_hw_state {
|
2014-07-30 00:06:16 +07:00
|
|
|
/* i9xx, pch plls */
|
2013-06-05 18:34:20 +07:00
|
|
|
uint32_t dpll;
|
2013-06-05 18:34:28 +07:00
|
|
|
uint32_t dpll_md;
|
2013-06-05 18:34:20 +07:00
|
|
|
uint32_t fp0;
|
|
|
|
uint32_t fp1;
|
2014-07-30 00:06:16 +07:00
|
|
|
|
|
|
|
/* hsw, bdw */
|
2014-07-04 21:27:39 +07:00
|
|
|
uint32_t wrpll;
|
2014-11-13 21:55:18 +07:00
|
|
|
|
|
|
|
/* skl */
|
|
|
|
/*
|
|
|
|
* DPLL_CTRL1 has 6 bits for each each this DPLL. We store those in
|
|
|
|
* lower part of crtl1 and they get shifted into position when writing
|
|
|
|
* the register. This allows us to easily compare the state to share
|
|
|
|
* the DPLL.
|
|
|
|
*/
|
|
|
|
uint32_t ctrl1;
|
|
|
|
/* HDMI only, 0 when used for DP */
|
|
|
|
uint32_t cfgcr1, cfgcr2;
|
2013-06-05 18:34:16 +07:00
|
|
|
};
|
|
|
|
|
2014-10-29 16:32:32 +07:00
|
|
|
struct intel_shared_dpll_config {
|
2014-10-29 16:32:31 +07:00
|
|
|
unsigned crtc_mask; /* mask of CRTCs sharing this PLL */
|
2014-10-29 16:32:32 +07:00
|
|
|
struct intel_dpll_hw_state hw_state;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct intel_shared_dpll {
|
|
|
|
struct intel_shared_dpll_config config;
|
2014-10-29 16:32:33 +07:00
|
|
|
struct intel_shared_dpll_config *new_config;
|
|
|
|
|
2012-04-20 23:11:53 +07:00
|
|
|
int active; /* count of number of active CRTCs (i.e. DPMS on) */
|
|
|
|
bool on; /* is the PLL actually active? Disabled during modeset */
|
2013-06-05 18:34:12 +07:00
|
|
|
const char *name;
|
|
|
|
/* should match the index in the dev_priv->shared_dplls array */
|
|
|
|
enum intel_dpll_id id;
|
2014-06-26 02:01:58 +07:00
|
|
|
/* The mode_set hook is optional and should be used together with the
|
|
|
|
* intel_prepare_shared_dpll function. */
|
2013-06-05 18:34:23 +07:00
|
|
|
void (*mode_set)(struct drm_i915_private *dev_priv,
|
|
|
|
struct intel_shared_dpll *pll);
|
2013-06-05 18:34:14 +07:00
|
|
|
void (*enable)(struct drm_i915_private *dev_priv,
|
|
|
|
struct intel_shared_dpll *pll);
|
|
|
|
void (*disable)(struct drm_i915_private *dev_priv,
|
|
|
|
struct intel_shared_dpll *pll);
|
2013-06-05 18:34:16 +07:00
|
|
|
bool (*get_hw_state)(struct drm_i915_private *dev_priv,
|
|
|
|
struct intel_shared_dpll *pll,
|
|
|
|
struct intel_dpll_hw_state *hw_state);
|
2012-04-20 23:11:53 +07:00
|
|
|
};
|
|
|
|
|
2014-11-13 21:55:14 +07:00
|
|
|
#define SKL_DPLL0 0
|
|
|
|
#define SKL_DPLL1 1
|
|
|
|
#define SKL_DPLL2 2
|
|
|
|
#define SKL_DPLL3 3
|
|
|
|
|
2012-11-29 21:59:36 +07:00
|
|
|
/* Used by dp and fdi links */
|
|
|
|
struct intel_link_m_n {
|
|
|
|
uint32_t tu;
|
|
|
|
uint32_t gmch_m;
|
|
|
|
uint32_t gmch_n;
|
|
|
|
uint32_t link_m;
|
|
|
|
uint32_t link_n;
|
|
|
|
};
|
|
|
|
|
|
|
|
void intel_link_compute_m_n(int bpp, int nlanes,
|
|
|
|
int pixel_clock, int link_clock,
|
|
|
|
struct intel_link_m_n *m_n);
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Interface history:
|
|
|
|
*
|
|
|
|
* 1.1: Original.
|
2006-01-02 16:14:23 +07:00
|
|
|
* 1.2: Add Power Management
|
|
|
|
* 1.3: Add vblank support
|
2006-01-25 11:31:43 +07:00
|
|
|
* 1.4: Fix cmdbuffer path, add heap destroy
|
2006-06-24 14:07:34 +07:00
|
|
|
* 1.5: Add vblank pipe configuration
|
2006-10-24 22:05:09 +07:00
|
|
|
* 1.6: - New ioctl for scheduling buffer swaps on vertical blank
|
|
|
|
* - Support vertical blank on secondary display pipe
|
2005-04-17 05:20:36 +07:00
|
|
|
*/
|
|
|
|
#define DRIVER_MAJOR 1
|
2006-10-24 22:05:09 +07:00
|
|
|
#define DRIVER_MINOR 6
|
2005-04-17 05:20:36 +07:00
|
|
|
#define DRIVER_PATCHLEVEL 0
|
|
|
|
|
2010-09-29 22:10:57 +07:00
|
|
|
#define WATCH_LISTS 0
|
2008-07-31 02:06:12 +07:00
|
|
|
|
2008-10-01 02:14:26 +07:00
|
|
|
struct opregion_header;
|
|
|
|
struct opregion_acpi;
|
|
|
|
struct opregion_swsci;
|
|
|
|
struct opregion_asle;
|
|
|
|
|
2008-08-06 01:37:25 +07:00
|
|
|
struct intel_opregion {
|
2012-04-17 04:07:42 +07:00
|
|
|
struct opregion_header __iomem *header;
|
|
|
|
struct opregion_acpi __iomem *acpi;
|
|
|
|
struct opregion_swsci __iomem *swsci;
|
2013-09-02 14:38:59 +07:00
|
|
|
u32 swsci_gbda_sub_functions;
|
|
|
|
u32 swsci_sbcb_sub_functions;
|
2012-04-17 04:07:42 +07:00
|
|
|
struct opregion_asle __iomem *asle;
|
|
|
|
void __iomem *vbt;
|
2011-01-17 02:37:30 +07:00
|
|
|
u32 __iomem *lid_state;
|
2013-10-31 23:55:48 +07:00
|
|
|
struct work_struct asle_work;
|
2008-08-06 01:37:25 +07:00
|
|
|
};
|
2010-08-19 22:09:23 +07:00
|
|
|
#define OPREGION_SIZE (8*1024)
|
2008-08-06 01:37:25 +07:00
|
|
|
|
2010-08-05 02:26:07 +07:00
|
|
|
struct intel_overlay;
|
|
|
|
struct intel_overlay_error_state;
|
|
|
|
|
2008-11-13 01:03:55 +07:00
|
|
|
#define I915_FENCE_REG_NONE -1
|
2013-04-09 17:02:47 +07:00
|
|
|
#define I915_MAX_NUM_FENCES 32
|
|
|
|
/* 32 fences + sign bit for FENCE_REG_NONE */
|
|
|
|
#define I915_MAX_NUM_FENCE_BITS 6
|
2008-11-13 01:03:55 +07:00
|
|
|
|
|
|
|
struct drm_i915_fence_reg {
|
2010-04-28 16:02:31 +07:00
|
|
|
struct list_head lru_list;
|
2010-11-12 20:53:37 +07:00
|
|
|
struct drm_i915_gem_object *obj;
|
2011-12-14 19:57:08 +07:00
|
|
|
int pin_count;
|
2008-11-13 01:03:55 +07:00
|
|
|
};
|
2008-11-28 11:22:24 +07:00
|
|
|
|
2009-05-31 16:17:17 +07:00
|
|
|
struct sdvo_device_mapping {
|
2010-09-24 18:52:03 +07:00
|
|
|
u8 initialized;
|
2009-05-31 16:17:17 +07:00
|
|
|
u8 dvo_port;
|
|
|
|
u8 slave_addr;
|
|
|
|
u8 dvo_wiring;
|
2010-09-24 18:52:03 +07:00
|
|
|
u8 i2c_pin;
|
2010-04-24 03:07:40 +07:00
|
|
|
u8 ddc_pin;
|
2009-05-31 16:17:17 +07:00
|
|
|
};
|
|
|
|
|
2010-11-21 20:12:35 +07:00
|
|
|
struct intel_display_error_state;
|
|
|
|
|
2009-06-19 06:56:52 +07:00
|
|
|
struct drm_i915_error_state {
|
2012-04-27 20:17:39 +07:00
|
|
|
struct kref ref;
|
2014-01-30 15:19:37 +07:00
|
|
|
struct timeval time;
|
|
|
|
|
2014-02-25 22:11:25 +07:00
|
|
|
char error_msg[128];
|
2014-02-25 22:11:27 +07:00
|
|
|
u32 reset_count;
|
2014-02-25 22:11:28 +07:00
|
|
|
u32 suspend_count;
|
2014-02-25 22:11:25 +07:00
|
|
|
|
2014-01-30 15:19:37 +07:00
|
|
|
/* Generic register state */
|
2009-06-19 06:56:52 +07:00
|
|
|
u32 eir;
|
|
|
|
u32 pgtbl_er;
|
2012-04-27 06:03:00 +07:00
|
|
|
u32 ier;
|
2014-08-06 00:07:13 +07:00
|
|
|
u32 gtier[4];
|
2012-06-05 04:42:52 +07:00
|
|
|
u32 ccid;
|
2013-01-15 19:05:55 +07:00
|
|
|
u32 derrmr;
|
|
|
|
u32 forcewake;
|
2014-01-30 15:19:37 +07:00
|
|
|
u32 error; /* gen6+ */
|
|
|
|
u32 err_int; /* gen7 */
|
|
|
|
u32 done_reg;
|
2014-01-30 15:19:39 +07:00
|
|
|
u32 gac_eco;
|
|
|
|
u32 gam_ecochk;
|
|
|
|
u32 gab_ctl;
|
|
|
|
u32 gfx_mode;
|
2014-01-30 15:19:37 +07:00
|
|
|
u32 extra_instdone[I915_NUM_INSTDONE_REG];
|
|
|
|
u64 fence[I915_MAX_NUM_FENCES];
|
|
|
|
struct intel_overlay_error_state *overlay;
|
|
|
|
struct intel_display_error_state *display;
|
2014-06-30 23:53:41 +07:00
|
|
|
struct drm_i915_error_object *semaphore_obj;
|
2014-01-30 15:19:37 +07:00
|
|
|
|
2012-02-15 18:25:37 +07:00
|
|
|
struct drm_i915_error_ring {
|
2014-01-27 20:52:34 +07:00
|
|
|
bool valid;
|
2014-01-30 15:19:38 +07:00
|
|
|
/* Software tracked state */
|
|
|
|
bool waiting;
|
|
|
|
int hangcheck_score;
|
|
|
|
enum intel_ring_hangcheck_action hangcheck_action;
|
|
|
|
int num_requests;
|
|
|
|
|
|
|
|
/* our own tracking of ring head and tail */
|
|
|
|
u32 cpu_ring_head;
|
|
|
|
u32 cpu_ring_tail;
|
|
|
|
|
|
|
|
u32 semaphore_seqno[I915_NUM_RINGS - 1];
|
|
|
|
|
|
|
|
/* Register state */
|
|
|
|
u32 tail;
|
|
|
|
u32 head;
|
|
|
|
u32 ctl;
|
|
|
|
u32 hws;
|
|
|
|
u32 ipeir;
|
|
|
|
u32 ipehr;
|
|
|
|
u32 instdone;
|
|
|
|
u32 bbstate;
|
|
|
|
u32 instpm;
|
|
|
|
u32 instps;
|
|
|
|
u32 seqno;
|
|
|
|
u64 bbaddr;
|
2014-03-21 19:41:53 +07:00
|
|
|
u64 acthd;
|
2014-01-30 15:19:38 +07:00
|
|
|
u32 fault_reg;
|
2014-04-02 06:31:07 +07:00
|
|
|
u64 faddr;
|
2014-01-30 15:19:38 +07:00
|
|
|
u32 rc_psmi; /* sleep state */
|
|
|
|
u32 semaphore_mboxes[I915_NUM_RINGS - 1];
|
|
|
|
|
2012-02-15 18:25:37 +07:00
|
|
|
struct drm_i915_error_object {
|
|
|
|
int page_count;
|
|
|
|
u32 gtt_offset;
|
|
|
|
u32 *pages[0];
|
2014-02-25 22:11:24 +07:00
|
|
|
} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
|
2014-01-30 15:19:38 +07:00
|
|
|
|
2012-02-15 18:25:37 +07:00
|
|
|
struct drm_i915_error_request {
|
|
|
|
long jiffies;
|
|
|
|
u32 seqno;
|
2012-02-15 18:25:38 +07:00
|
|
|
u32 tail;
|
2012-02-15 18:25:37 +07:00
|
|
|
} *requests;
|
2014-01-30 15:19:40 +07:00
|
|
|
|
|
|
|
struct {
|
|
|
|
u32 gfx_mode;
|
|
|
|
union {
|
|
|
|
u64 pdp[4];
|
|
|
|
u32 pp_dir_base;
|
|
|
|
};
|
|
|
|
} vm_info;
|
2014-02-25 22:11:24 +07:00
|
|
|
|
|
|
|
pid_t pid;
|
|
|
|
char comm[TASK_COMM_LEN];
|
2012-02-15 18:25:37 +07:00
|
|
|
} ring[I915_NUM_RINGS];
|
2014-08-13 02:05:47 +07:00
|
|
|
|
2010-02-18 17:24:56 +07:00
|
|
|
struct drm_i915_error_buffer {
|
2011-01-10 04:07:49 +07:00
|
|
|
u32 size;
|
2010-02-18 17:24:56 +07:00
|
|
|
u32 name;
|
2012-07-20 18:41:01 +07:00
|
|
|
u32 rseqno, wseqno;
|
2010-02-18 17:24:56 +07:00
|
|
|
u32 gtt_offset;
|
|
|
|
u32 read_domains;
|
|
|
|
u32 write_domain;
|
2011-10-10 02:52:02 +07:00
|
|
|
s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
|
2010-02-18 17:24:56 +07:00
|
|
|
s32 pinned:2;
|
|
|
|
u32 tiling:2;
|
|
|
|
u32 dirty:1;
|
|
|
|
u32 purgeable:1;
|
drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).
v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.
v11: Drop vma behaviour changes -- locking is nigh on impossible.
Use a worker to load user pages to avoid lock inversions.
v12: Use get_task_mm()/mmput() for correct refcounting of mm.
v13: Use a worker to release the mmu_notifier to avoid lock inversion
v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
with its own locking and tree of objects for each mm/mmu_notifier.
v15: Prevent overlapping userptr objects, and invalidate all objects
within the mmu_notifier range
v16: Fix a typo for iterating over multiple objects in the range and
rearrange error path to destroy the mmu_notifier locklessly.
Also close a race between invalidate_range and the get_pages_worker.
v17: Close a race between get_pages_worker/invalidate_range and fresh
allocations of the same userptr range - and notice that
struct_mutex was presumed to be held when during creation it wasn't.
v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory
for the struct sg_table and to clear it before reporting an error.
v19: Always error out on read-only userptr requests as we don't have the
hardware infrastructure to support them at the moment.
v20: Refuse to implement read-only support until we have the required
infrastructure - but reserve the bit in flags for future use.
v21: use_mm() is not required for get_user_pages(). It is only meant to
be used to fix up the kernel thread's current->mm for use with
copy_user().
v22: Use sg_alloc_table_from_pages for that chunky feeling
v23: Export a function for sanity checking dma-buf rather than encode
userptr details elsewhere, and clean up comments based on
suggestions by Bradley.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Akash Goel <akash.goel@intel.com>
Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com>
[danvet: Frob ioctl allocation to pick the next one - will cause a bit
of fuss with create2 apparently, but such are the rules.]
[danvet2: oops, forgot to git add after manual patch application]
[danvet3: Appease sparse.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-05-16 20:22:37 +07:00
|
|
|
u32 userptr:1;
|
2012-02-16 17:03:29 +07:00
|
|
|
s32 ring:4;
|
2013-09-25 16:23:19 +07:00
|
|
|
u32 cache_level:3;
|
2013-08-01 07:00:15 +07:00
|
|
|
} **active_bo, **pinned_bo;
|
2014-01-30 15:19:40 +07:00
|
|
|
|
2013-08-01 07:00:15 +07:00
|
|
|
u32 *active_bo_count, *pinned_bo_count;
|
2014-08-13 02:05:47 +07:00
|
|
|
u32 vm_count;
|
2009-06-19 06:56:52 +07:00
|
|
|
};
|
|
|
|
|
2013-11-08 21:48:56 +07:00
|
|
|
struct intel_connector;
|
2014-10-27 21:26:47 +07:00
|
|
|
struct intel_encoder;
|
2013-03-27 06:44:50 +07:00
|
|
|
struct intel_crtc_config;
|
2014-03-07 23:57:48 +07:00
|
|
|
struct intel_plane_config;
|
2013-03-28 16:42:00 +07:00
|
|
|
struct intel_crtc;
|
2013-06-04 03:40:22 +07:00
|
|
|
struct intel_limit;
|
|
|
|
struct dpll;
|
2013-03-27 06:44:50 +07:00
|
|
|
|
2009-09-22 00:42:27 +07:00
|
|
|
struct drm_i915_display_funcs {
|
2010-04-23 22:17:39 +07:00
|
|
|
bool (*fbc_enabled)(struct drm_device *dev);
|
2013-12-12 22:27:40 +07:00
|
|
|
void (*enable_fbc)(struct drm_crtc *crtc);
|
2009-09-22 00:42:27 +07:00
|
|
|
void (*disable_fbc)(struct drm_device *dev);
|
|
|
|
int (*get_display_clock_speed)(struct drm_device *dev);
|
|
|
|
int (*get_fifo_size)(struct drm_device *dev, int plane);
|
2013-06-04 03:40:22 +07:00
|
|
|
/**
|
|
|
|
* find_dpll() - Find the best values for the PLL
|
|
|
|
* @limit: limits for the PLL
|
|
|
|
* @crtc: current CRTC
|
|
|
|
* @target: target frequency in kHz
|
|
|
|
* @refclk: reference clock frequency in kHz
|
|
|
|
* @match_clock: if provided, @best_clock P divider must
|
|
|
|
* match the P divider from @match_clock
|
|
|
|
* used for LVDS downclocking
|
|
|
|
* @best_clock: best PLL values found
|
|
|
|
*
|
|
|
|
* Returns true on success, false on failure.
|
|
|
|
*/
|
|
|
|
bool (*find_dpll)(const struct intel_limit *limit,
|
2014-10-20 17:46:43 +07:00
|
|
|
struct intel_crtc *crtc,
|
2013-06-04 03:40:22 +07:00
|
|
|
int target, int refclk,
|
|
|
|
struct dpll *match_clock,
|
|
|
|
struct dpll *best_clock);
|
2013-09-10 15:40:40 +07:00
|
|
|
void (*update_wm)(struct drm_crtc *crtc);
|
2013-08-07 02:24:11 +07:00
|
|
|
void (*update_sprite_wm)(struct drm_plane *plane,
|
|
|
|
struct drm_crtc *crtc,
|
2014-07-15 14:21:24 +07:00
|
|
|
uint32_t sprite_width, uint32_t sprite_height,
|
|
|
|
int pixel_size, bool enable, bool scaled);
|
2012-10-26 15:58:18 +07:00
|
|
|
void (*modeset_global_resources)(struct drm_device *dev);
|
2013-03-28 16:42:00 +07:00
|
|
|
/* Returns the active state of the crtc, and if the crtc is active,
|
|
|
|
* fills out the pipe-config with the hw state. */
|
|
|
|
bool (*get_pipe_config)(struct intel_crtc *,
|
|
|
|
struct intel_crtc_config *);
|
2014-03-07 23:57:48 +07:00
|
|
|
void (*get_plane_config)(struct intel_crtc *,
|
|
|
|
struct intel_plane_config *);
|
2014-10-29 16:32:33 +07:00
|
|
|
int (*crtc_compute_clock)(struct intel_crtc *crtc);
|
2012-06-30 03:39:33 +07:00
|
|
|
void (*crtc_enable)(struct drm_crtc *crtc);
|
|
|
|
void (*crtc_disable)(struct drm_crtc *crtc);
|
2012-04-20 23:11:53 +07:00
|
|
|
void (*off)(struct drm_crtc *crtc);
|
2014-10-27 21:26:50 +07:00
|
|
|
void (*audio_codec_enable)(struct drm_connector *connector,
|
|
|
|
struct intel_encoder *encoder,
|
|
|
|
struct drm_display_mode *mode);
|
|
|
|
void (*audio_codec_disable)(struct intel_encoder *encoder);
|
2011-04-29 04:27:04 +07:00
|
|
|
void (*fdi_link_train)(struct drm_crtc *crtc);
|
2011-04-29 05:04:31 +07:00
|
|
|
void (*init_clock_gating)(struct drm_device *dev);
|
2011-06-16 23:19:13 +07:00
|
|
|
int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc,
|
|
|
|
struct drm_framebuffer *fb,
|
2013-07-23 08:49:58 +07:00
|
|
|
struct drm_i915_gem_object *obj,
|
2014-05-22 20:13:33 +07:00
|
|
|
struct intel_engine_cs *ring,
|
2013-07-23 08:49:58 +07:00
|
|
|
uint32_t flags);
|
2014-04-25 04:55:01 +07:00
|
|
|
void (*update_primary_plane)(struct drm_crtc *crtc,
|
|
|
|
struct drm_framebuffer *fb,
|
|
|
|
int x, int y);
|
2012-12-11 20:05:07 +07:00
|
|
|
void (*hpd_irq_setup)(struct drm_device *dev);
|
2009-09-22 00:42:27 +07:00
|
|
|
/* clock updates for mode set */
|
|
|
|
/* cursor updates */
|
|
|
|
/* render clock increase/decrease */
|
|
|
|
/* display clock increase/decrease */
|
|
|
|
/* pll clock increase/decrease */
|
2013-11-08 21:48:56 +07:00
|
|
|
|
2014-11-07 16:16:02 +07:00
|
|
|
int (*setup_backlight)(struct intel_connector *connector, enum pipe pipe);
|
2013-11-08 21:48:56 +07:00
|
|
|
uint32_t (*get_backlight)(struct intel_connector *connector);
|
|
|
|
void (*set_backlight)(struct intel_connector *connector,
|
|
|
|
uint32_t level);
|
|
|
|
void (*disable_backlight)(struct intel_connector *connector);
|
|
|
|
void (*enable_backlight)(struct intel_connector *connector);
|
2009-09-22 00:42:27 +07:00
|
|
|
};
|
|
|
|
|
2013-07-20 02:36:52 +07:00
|
|
|
struct intel_uncore_funcs {
|
2013-11-23 16:25:42 +07:00
|
|
|
void (*force_wake_get)(struct drm_i915_private *dev_priv,
|
|
|
|
int fw_engine);
|
|
|
|
void (*force_wake_put)(struct drm_i915_private *dev_priv,
|
|
|
|
int fw_engine);
|
2013-10-05 11:22:51 +07:00
|
|
|
|
|
|
|
uint8_t (*mmio_readb)(struct drm_i915_private *dev_priv, off_t offset, bool trace);
|
|
|
|
uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv, off_t offset, bool trace);
|
|
|
|
uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv, off_t offset, bool trace);
|
|
|
|
uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv, off_t offset, bool trace);
|
|
|
|
|
|
|
|
void (*mmio_writeb)(struct drm_i915_private *dev_priv, off_t offset,
|
|
|
|
uint8_t val, bool trace);
|
|
|
|
void (*mmio_writew)(struct drm_i915_private *dev_priv, off_t offset,
|
|
|
|
uint16_t val, bool trace);
|
|
|
|
void (*mmio_writel)(struct drm_i915_private *dev_priv, off_t offset,
|
|
|
|
uint32_t val, bool trace);
|
|
|
|
void (*mmio_writeq)(struct drm_i915_private *dev_priv, off_t offset,
|
|
|
|
uint64_t val, bool trace);
|
2012-07-02 21:51:02 +07:00
|
|
|
};
|
|
|
|
|
2013-07-20 02:36:52 +07:00
|
|
|
struct intel_uncore {
|
|
|
|
spinlock_t lock; /** lock is also taken in irq contexts. */
|
|
|
|
|
|
|
|
struct intel_uncore_funcs funcs;
|
|
|
|
|
|
|
|
unsigned fifo_count;
|
|
|
|
unsigned forcewake_count;
|
2013-08-26 19:46:09 +07:00
|
|
|
|
2013-11-23 16:25:43 +07:00
|
|
|
unsigned fw_rendercount;
|
|
|
|
unsigned fw_mediacount;
|
2014-11-05 00:07:04 +07:00
|
|
|
unsigned fw_blittercount;
|
2013-11-23 16:25:43 +07:00
|
|
|
|
2014-03-05 19:00:39 +07:00
|
|
|
struct timer_list force_wake_timer;
|
2013-07-20 02:36:52 +07:00
|
|
|
};
|
|
|
|
|
2013-04-23 22:37:17 +07:00
|
|
|
#define DEV_INFO_FOR_EACH_FLAG(func, sep) \
|
|
|
|
func(is_mobile) sep \
|
|
|
|
func(is_i85x) sep \
|
|
|
|
func(is_i915g) sep \
|
|
|
|
func(is_i945gm) sep \
|
|
|
|
func(is_g33) sep \
|
|
|
|
func(need_gfx_hws) sep \
|
|
|
|
func(is_g4x) sep \
|
|
|
|
func(is_pineview) sep \
|
|
|
|
func(is_broadwater) sep \
|
|
|
|
func(is_crestline) sep \
|
|
|
|
func(is_ivybridge) sep \
|
|
|
|
func(is_valleyview) sep \
|
|
|
|
func(is_haswell) sep \
|
2014-04-02 12:54:50 +07:00
|
|
|
func(is_skylake) sep \
|
2013-08-24 06:00:07 +07:00
|
|
|
func(is_preliminary) sep \
|
2013-04-23 22:37:17 +07:00
|
|
|
func(has_fbc) sep \
|
|
|
|
func(has_pipe_cxsr) sep \
|
|
|
|
func(has_hotplug) sep \
|
|
|
|
func(cursor_needs_physical) sep \
|
|
|
|
func(has_overlay) sep \
|
|
|
|
func(overlay_needs_physical) sep \
|
|
|
|
func(supports_tv) sep \
|
2013-04-23 00:40:39 +07:00
|
|
|
func(has_llc) sep \
|
2013-04-23 00:40:41 +07:00
|
|
|
func(has_ddi) sep \
|
|
|
|
func(has_fpga_dbg)
|
2012-08-09 03:01:51 +07:00
|
|
|
|
2013-04-23 00:40:38 +07:00
|
|
|
#define DEFINE_FLAG(name) u8 name:1
|
|
|
|
#define SEP_SEMICOLON ;
|
2012-08-09 03:01:51 +07:00
|
|
|
|
2009-12-17 03:16:16 +07:00
|
|
|
struct intel_device_info {
|
2013-01-24 20:29:28 +07:00
|
|
|
u32 display_mmio_offset;
|
2014-08-10 01:18:42 +07:00
|
|
|
u16 device_id;
|
2013-03-14 04:05:41 +07:00
|
|
|
u8 num_pipes:3;
|
2014-03-04 00:31:48 +07:00
|
|
|
u8 num_sprites[I915_MAX_PIPES];
|
2010-08-11 15:59:24 +07:00
|
|
|
u8 gen;
|
2013-10-16 00:02:57 +07:00
|
|
|
u8 ring_mask; /* Rings supported by the HW */
|
2013-04-23 00:40:38 +07:00
|
|
|
DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG, SEP_SEMICOLON);
|
drm/i915: Reorganize display pipe register accesses
RFCv2: Reorganize array indexing so that full offsets can be used as
is. It makes grepping for registers in i915_reg.h much easier. Also
move offset arrays to intel_device_info.
v1: Fixed offsets for VLV, proper eDP handling
v2: Fixed BCLRPAT, PIPESRC, PIPECONF and DSP* macros.
v3: Added EDP pipe comment, removed redundant offset arrays for
MSA_MISC and DDI_FUNC_CTL.
v4: Rename patch and report object size increase.
v5: Change location of commas, add PIPE_EDP into enum pipe
v6: Insert PIPE_EDP_OFFSET into pipe offset array
v7: Set I915_MAX_PIPES back to 3, change more registers accessors
to use the new macros, get rid of _PIPE_INC and add dev_priv
as a parameter where required by the new macros.
Upcoming hardware will not have the various display pipe register
ranges evenly spaced in memory. Change register address calculations
into array lookups.
Tested on SNB, VLV, IVB, Gen2 and HSW w/eDP.
I left the UMS cruft untouched.
Size differences:
text data bss dec hex filename
596431 4634 56 601121 92c21 i915.ko (new)
593199 4634 56 597889 91f81 i915.ko (old)
Signed-off-by: Antti Koskipaa <antti.koskipaa@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Tested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-04 19:22:24 +07:00
|
|
|
/* Register offsets for the various display pipes and transcoders */
|
|
|
|
int pipe_offsets[I915_MAX_TRANSCODERS];
|
|
|
|
int trans_offsets[I915_MAX_TRANSCODERS];
|
|
|
|
int palette_offsets[I915_MAX_PIPES];
|
2014-04-09 17:28:53 +07:00
|
|
|
int cursor_offsets[I915_MAX_PIPES];
|
2009-12-17 03:16:16 +07:00
|
|
|
};
|
|
|
|
|
2013-04-23 00:40:38 +07:00
|
|
|
#undef DEFINE_FLAG
|
|
|
|
#undef SEP_SEMICOLON
|
|
|
|
|
2013-01-25 05:44:55 +07:00
|
|
|
enum i915_cache_level {
|
|
|
|
I915_CACHE_NONE = 0,
|
2013-08-06 19:17:02 +07:00
|
|
|
I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */
|
|
|
|
I915_CACHE_L3_LLC, /* gen7+, L3 sits between the domain specifc
|
|
|
|
caches, eg sampler/render caches, and the
|
|
|
|
large Last-Level-Cache. LLC is coherent with
|
|
|
|
the CPU, but L3 is only visible to the GPU. */
|
2013-08-08 20:41:10 +07:00
|
|
|
I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */
|
2013-01-25 05:44:55 +07:00
|
|
|
};
|
|
|
|
|
2013-06-12 16:35:28 +07:00
|
|
|
struct i915_ctx_hang_stats {
|
|
|
|
/* This context had batch pending when hang was declared */
|
|
|
|
unsigned batch_pending;
|
|
|
|
|
|
|
|
/* This context had batch active when hang was declared */
|
|
|
|
unsigned batch_active;
|
2013-08-30 20:19:28 +07:00
|
|
|
|
|
|
|
/* Time when this context was last blamed for a GPU reset */
|
|
|
|
unsigned long guilty_ts;
|
|
|
|
|
|
|
|
/* This context is banned to submit more work */
|
|
|
|
bool banned;
|
2013-06-12 16:35:28 +07:00
|
|
|
};
|
2012-06-05 04:42:43 +07:00
|
|
|
|
|
|
|
/* This must match up with the value previously used for execbuf2.rsvd1. */
|
drm/i915: Emphasize that ctx->id is merely a user handle
This is an Execlists preparatory patch, since they make context ID become an
overloaded term:
- In the software, it was used to distinguish which context userspace was
trying to use.
- In the BSpec, the term is used to describe the 20-bits long field the
hardware uses to it to discriminate the contexts that are submitted to
the ELSP and inform the driver about their current status (via Context
Switch Interrupts and Context Status Buffers).
Initially, I tried to make the different meanings converge, but it proved
impossible:
- The software ctx->id is per-filp, while the hardware one needs to be
globally unique.
- Also, we multiplex several backing states objects per intel_context,
and all of them need unique HW IDs.
- I tried adding a per-filp ID and then composing the HW context ID as:
ctx->id + file_priv->id + ring->id, but the fact that the hardware only
uses 20-bits means we have to artificially limit the number of filps or
contexts the userspace can create.
The ctx->user_handle renaming bits are done with this Cocci patch (plus
manual frobbing of the struct declaration):
@@
struct intel_context c;
@@
- (c).id
+ c.user_handle
@@
struct intel_context *c;
@@
- (c)->id
+ c->user_handle
Also, while we are at it, s/DEFAULT_CONTEXT_ID/DEFAULT_CONTEXT_HANDLE and
change the type to unsigned 32 bits.
v2: s/handle/user_handle and change the type to uint32_t as suggested by
Chris Wilson.
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> (v1)
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-07-03 22:28:00 +07:00
|
|
|
#define DEFAULT_CONTEXT_HANDLE 0
|
2014-07-03 22:28:01 +07:00
|
|
|
/**
|
|
|
|
* struct intel_context - as the name implies, represents a context.
|
|
|
|
* @ref: reference count.
|
|
|
|
* @user_handle: userspace tracking identity for this context.
|
|
|
|
* @remap_slice: l3 row remapping information.
|
|
|
|
* @file_priv: filp associated with this context (NULL for global default
|
|
|
|
* context).
|
|
|
|
* @hang_stats: information about the role of this context in possible GPU
|
|
|
|
* hangs.
|
|
|
|
* @vm: virtual memory space used by this context.
|
|
|
|
* @legacy_hw_ctx: render context backing object and whether it is correctly
|
|
|
|
* initialized (legacy ring submission mechanism only).
|
|
|
|
* @link: link in the global list of contexts.
|
|
|
|
*
|
|
|
|
* Contexts are memory images used by the hardware to store copies of their
|
|
|
|
* internal state.
|
|
|
|
*/
|
2014-05-22 20:13:37 +07:00
|
|
|
struct intel_context {
|
2013-04-30 17:30:33 +07:00
|
|
|
struct kref ref;
|
drm/i915: Emphasize that ctx->id is merely a user handle
This is an Execlists preparatory patch, since they make context ID become an
overloaded term:
- In the software, it was used to distinguish which context userspace was
trying to use.
- In the BSpec, the term is used to describe the 20-bits long field the
hardware uses to it to discriminate the contexts that are submitted to
the ELSP and inform the driver about their current status (via Context
Switch Interrupts and Context Status Buffers).
Initially, I tried to make the different meanings converge, but it proved
impossible:
- The software ctx->id is per-filp, while the hardware one needs to be
globally unique.
- Also, we multiplex several backing states objects per intel_context,
and all of them need unique HW IDs.
- I tried adding a per-filp ID and then composing the HW context ID as:
ctx->id + file_priv->id + ring->id, but the fact that the hardware only
uses 20-bits means we have to artificially limit the number of filps or
contexts the userspace can create.
The ctx->user_handle renaming bits are done with this Cocci patch (plus
manual frobbing of the struct declaration):
@@
struct intel_context c;
@@
- (c).id
+ c.user_handle
@@
struct intel_context *c;
@@
- (c)->id
+ c->user_handle
Also, while we are at it, s/DEFAULT_CONTEXT_ID/DEFAULT_CONTEXT_HANDLE and
change the type to unsigned 32 bits.
v2: s/handle/user_handle and change the type to uint32_t as suggested by
Chris Wilson.
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> (v1)
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-07-03 22:28:00 +07:00
|
|
|
int user_handle;
|
drm/i915: Do remaps for all contexts
On both Ivybridge and Haswell, row remapping information is saved and
restored with context. This means, we never actually properly supported
the l3 remapping because our sysfs interface is asynchronous (and not
tied to any context), and the known faulty HW would be reused by the
next context to run.
Not that due to the asynchronous nature of the sysfs entry, there is no
point modifying the registers for the existing context. Instead we set a
flag for all contexts to load the correct remapping information on the
next run. Interested clients can use debugfs to determine whether or not
the row has been remapped.
One could propose at this point that we just do the remapping in the
kernel. I guess since we have to maintain the sysfs interface anyway,
I'm not sure how useful it is, and I do like keeping the policy in
userspace; (it wasn't my original decision to make the
interface the way it is, so I'm not attached).
v2: Force a context switch when we have a remap on the next switch.
(Ville)
Don't let userspace use the interface with disabled contexts.
v3: Don't force a context switch, just let it nop
Improper context slice remap initialization, 1<<1 instead of 1<<i, but I
rewrote it to avoid a second round of confusion.
Error print moved to error path (All Ville)
Added a comment on why the slice remap initialization happens.
CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-09-19 09:03:18 +07:00
|
|
|
uint8_t remap_slice;
|
2012-06-05 04:42:43 +07:00
|
|
|
struct drm_i915_file_private *file_priv;
|
2013-06-12 16:35:28 +07:00
|
|
|
struct i915_ctx_hang_stats hang_stats;
|
2014-08-06 20:04:53 +07:00
|
|
|
struct i915_hw_ppgtt *ppgtt;
|
2013-09-18 11:12:45 +07:00
|
|
|
|
drm/i915/bdw: Introduce one context backing object per engine
A context backing object only makes sense for a given engine (because
it holds state data specific to that engine).
In legacy ringbuffer sumission mode, the only MI_SET_CONTEXT we really
perform is for the render engine, so one backing object is all we nee.
With Execlists, however, we need backing objects for every engine, as
contexts become the only way to submit workloads to the GPU. To tackle
this problem, we multiplex the context struct to contain <no-of-engines>
objects.
Originally, I colored this code by instantiating one new context for
every engine I wanted to use, but this change suggested by Brad Volkin
makes it more elegant.
v2: Leave the old backing object pointer behind. Daniel Vetter suggested
using a union, but it makes more sense to keep rcs_state as a NULL
pointer behind, to make sure no one uses it incorrectly when Execlists
are enabled, similar to what he suggested for ring->buffer (Rusty's API
level 5).
v3: Use the name "state" instead of the too-generic "obj", so that it
mirrors the name choice for the legacy rcs_state.
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-07-24 23:04:13 +07:00
|
|
|
/* Legacy ring buffer submission */
|
2014-07-03 22:27:59 +07:00
|
|
|
struct {
|
|
|
|
struct drm_i915_gem_object *rcs_state;
|
|
|
|
bool initialized;
|
|
|
|
} legacy_hw_ctx;
|
|
|
|
|
drm/i915/bdw: Introduce one context backing object per engine
A context backing object only makes sense for a given engine (because
it holds state data specific to that engine).
In legacy ringbuffer sumission mode, the only MI_SET_CONTEXT we really
perform is for the render engine, so one backing object is all we nee.
With Execlists, however, we need backing objects for every engine, as
contexts become the only way to submit workloads to the GPU. To tackle
this problem, we multiplex the context struct to contain <no-of-engines>
objects.
Originally, I colored this code by instantiating one new context for
every engine I wanted to use, but this change suggested by Brad Volkin
makes it more elegant.
v2: Leave the old backing object pointer behind. Daniel Vetter suggested
using a union, but it makes more sense to keep rcs_state as a NULL
pointer behind, to make sure no one uses it incorrectly when Execlists
are enabled, similar to what he suggested for ring->buffer (Rusty's API
level 5).
v3: Use the name "state" instead of the too-generic "obj", so that it
mirrors the name choice for the legacy rcs_state.
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-07-24 23:04:13 +07:00
|
|
|
/* Execlists */
|
2014-08-21 17:40:54 +07:00
|
|
|
bool rcs_initialized;
|
drm/i915/bdw: Introduce one context backing object per engine
A context backing object only makes sense for a given engine (because
it holds state data specific to that engine).
In legacy ringbuffer sumission mode, the only MI_SET_CONTEXT we really
perform is for the render engine, so one backing object is all we nee.
With Execlists, however, we need backing objects for every engine, as
contexts become the only way to submit workloads to the GPU. To tackle
this problem, we multiplex the context struct to contain <no-of-engines>
objects.
Originally, I colored this code by instantiating one new context for
every engine I wanted to use, but this change suggested by Brad Volkin
makes it more elegant.
v2: Leave the old backing object pointer behind. Daniel Vetter suggested
using a union, but it makes more sense to keep rcs_state as a NULL
pointer behind, to make sure no one uses it incorrectly when Execlists
are enabled, similar to what he suggested for ring->buffer (Rusty's API
level 5).
v3: Use the name "state" instead of the too-generic "obj", so that it
mirrors the name choice for the legacy rcs_state.
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-07-24 23:04:13 +07:00
|
|
|
struct {
|
|
|
|
struct drm_i915_gem_object *state;
|
2014-07-24 23:04:15 +07:00
|
|
|
struct intel_ringbuffer *ringbuf;
|
drm/i915/bdw: Pin the context backing objects to GGTT on-demand
Up until now, we have pinned every logical ring context backing object
during creation, and left it pinned until destruction. This made my life
easier, but it's a harmful thing to do, because we cause fragmentation
of the GGTT (and, eventually, we would run out of space).
This patch makes the pinning on-demand: the backing objects of the two
contexts that are written to the ELSP are pinned right before submission
and unpinned once the hardware is done with them. The only context that
is still pinned regardless is the global default one, so that the HWS can
still be accessed in the same way (ring->status_page).
v2: In the early version of this patch, we were pinning the context as
we put it into the ELSP: on the one hand, this is very efficient because
only a maximum two contexts are pinned at any given time, but on the other
hand, we cannot really pin in interrupt time :(
v3: Use a mutex rather than atomic_t to protect pin count to avoid races.
Do not unpin default context in free_request.
v4: Break out pin and unpin into functions. Fix style problems reported
by checkpatch
v5: Remove unpin_lock as all pinning and unpinning is done with the struct
mutex already locked. Add WARN_ONs to make sure this is the case in future.
Issue: VIZ-4277
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Signed-off-by: Thomas Daniel <thomas.daniel@intel.com>
Reviewed-by: Akash Goel <akash.goels@gmail.com>
Reviewed-by: Deepak S<deepak.s@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-11-13 17:28:10 +07:00
|
|
|
int unpin_count;
|
drm/i915/bdw: Introduce one context backing object per engine
A context backing object only makes sense for a given engine (because
it holds state data specific to that engine).
In legacy ringbuffer sumission mode, the only MI_SET_CONTEXT we really
perform is for the render engine, so one backing object is all we nee.
With Execlists, however, we need backing objects for every engine, as
contexts become the only way to submit workloads to the GPU. To tackle
this problem, we multiplex the context struct to contain <no-of-engines>
objects.
Originally, I colored this code by instantiating one new context for
every engine I wanted to use, but this change suggested by Brad Volkin
makes it more elegant.
v2: Leave the old backing object pointer behind. Daniel Vetter suggested
using a union, but it makes more sense to keep rcs_state as a NULL
pointer behind, to make sure no one uses it incorrectly when Execlists
are enabled, similar to what he suggested for ring->buffer (Rusty's API
level 5).
v3: Use the name "state" instead of the too-generic "obj", so that it
mirrors the name choice for the legacy rcs_state.
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-07-24 23:04:13 +07:00
|
|
|
} engine[I915_NUM_RINGS];
|
|
|
|
|
2013-09-18 11:12:45 +07:00
|
|
|
struct list_head link;
|
2012-06-05 04:42:43 +07:00
|
|
|
};
|
|
|
|
|
2013-06-28 06:30:21 +07:00
|
|
|
struct i915_fbc {
|
|
|
|
unsigned long size;
|
2014-07-01 00:41:24 +07:00
|
|
|
unsigned threshold;
|
2013-06-28 06:30:21 +07:00
|
|
|
unsigned int fb_id;
|
|
|
|
enum plane plane;
|
|
|
|
int y;
|
|
|
|
|
2014-06-20 02:06:10 +07:00
|
|
|
struct drm_mm_node compressed_fb;
|
2013-06-28 06:30:21 +07:00
|
|
|
struct drm_mm_node *compressed_llb;
|
|
|
|
|
2014-08-01 16:04:45 +07:00
|
|
|
bool false_color;
|
|
|
|
|
2014-09-20 02:04:55 +07:00
|
|
|
/* Tracks whether the HW is actually enabled, not whether the feature is
|
|
|
|
* possible. */
|
|
|
|
bool enabled;
|
|
|
|
|
2014-09-25 06:50:59 +07:00
|
|
|
/* On gen8 some rings cannont perform fbc clean operation so for now
|
|
|
|
* we are doing this on SW with mmio.
|
|
|
|
* This variable works in the opposite information direction
|
|
|
|
* of ring->fbc_dirty telling software on frontbuffer tracking
|
|
|
|
* to perform the cache clean on sw side.
|
|
|
|
*/
|
|
|
|
bool need_sw_cache_clean;
|
|
|
|
|
2013-06-28 06:30:21 +07:00
|
|
|
struct intel_fbc_work {
|
|
|
|
struct delayed_work work;
|
|
|
|
struct drm_crtc *crtc;
|
|
|
|
struct drm_framebuffer *fb;
|
|
|
|
} *fbc_work;
|
|
|
|
|
2013-07-27 23:23:55 +07:00
|
|
|
enum no_fbc_reason {
|
|
|
|
FBC_OK, /* FBC is enabled */
|
|
|
|
FBC_UNSUPPORTED, /* FBC is not supported by this chipset */
|
2013-06-28 06:30:21 +07:00
|
|
|
FBC_NO_OUTPUT, /* no outputs enabled to compress */
|
|
|
|
FBC_STOLEN_TOO_SMALL, /* not enough space for buffers */
|
|
|
|
FBC_UNSUPPORTED_MODE, /* interlace or doublescanned mode */
|
|
|
|
FBC_MODE_TOO_LARGE, /* mode too large for compression */
|
|
|
|
FBC_BAD_PLANE, /* fbc not supported on plane */
|
|
|
|
FBC_NOT_TILED, /* buffer not tiled */
|
|
|
|
FBC_MULTIPLE_PIPES, /* more than one pipe active */
|
|
|
|
FBC_MODULE_PARAM,
|
|
|
|
FBC_CHIP_DEFAULT, /* disabled by default on this chip */
|
|
|
|
} no_fbc_reason;
|
2010-02-06 03:42:41 +07:00
|
|
|
};
|
|
|
|
|
2014-04-05 13:43:28 +07:00
|
|
|
struct i915_drrs {
|
|
|
|
struct intel_connector *connector;
|
|
|
|
};
|
|
|
|
|
2014-07-12 00:30:11 +07:00
|
|
|
struct intel_dp;
|
2013-10-04 02:15:06 +07:00
|
|
|
struct i915_psr {
|
2014-07-12 00:30:15 +07:00
|
|
|
struct mutex lock;
|
2013-10-04 02:15:06 +07:00
|
|
|
bool sink_support;
|
|
|
|
bool source_ok;
|
2014-07-12 00:30:11 +07:00
|
|
|
struct intel_dp *enabled;
|
2014-06-13 19:10:03 +07:00
|
|
|
bool active;
|
|
|
|
struct delayed_work work;
|
2014-07-12 00:30:16 +07:00
|
|
|
unsigned busy_frontbuffer_bits;
|
2013-07-12 04:45:00 +07:00
|
|
|
};
|
2013-06-28 06:30:21 +07:00
|
|
|
|
2010-04-07 15:15:53 +07:00
|
|
|
enum intel_pch {
|
2012-07-04 04:48:16 +07:00
|
|
|
PCH_NONE = 0, /* No PCH present */
|
2010-04-07 15:15:53 +07:00
|
|
|
PCH_IBX, /* Ibexpeak PCH */
|
|
|
|
PCH_CPT, /* Cougarpoint PCH */
|
2012-03-29 22:32:20 +07:00
|
|
|
PCH_LPT, /* Lynxpoint PCH */
|
2014-04-09 12:38:57 +07:00
|
|
|
PCH_SPT, /* Sunrisepoint PCH */
|
2013-04-06 03:12:40 +07:00
|
|
|
PCH_NOP,
|
2010-04-07 15:15:53 +07:00
|
|
|
};
|
|
|
|
|
2012-12-01 21:04:24 +07:00
|
|
|
enum intel_sbi_destination {
|
|
|
|
SBI_ICLK,
|
|
|
|
SBI_MPHY,
|
|
|
|
};
|
|
|
|
|
2010-07-20 03:53:12 +07:00
|
|
|
#define QUIRK_PIPEA_FORCE (1<<0)
|
2011-07-13 04:56:22 +07:00
|
|
|
#define QUIRK_LVDS_SSC_DISABLE (1<<1)
|
2012-03-15 21:56:26 +07:00
|
|
|
#define QUIRK_INVERT_BRIGHTNESS (1<<2)
|
2014-07-04 06:27:50 +07:00
|
|
|
#define QUIRK_BACKLIGHT_PRESENT (1<<3)
|
2014-08-15 05:22:07 +07:00
|
|
|
#define QUIRK_PIPEB_FORCE (1<<4)
|
2014-11-20 15:26:30 +07:00
|
|
|
#define QUIRK_PIN_SWIZZLED_PAGES (1<<5)
|
2010-07-20 03:53:12 +07:00
|
|
|
|
2010-03-30 12:34:14 +07:00
|
|
|
struct intel_fbdev;
|
2011-07-08 18:22:42 +07:00
|
|
|
struct intel_fbc_work;
|
2010-03-30 12:34:13 +07:00
|
|
|
|
2012-02-15 04:37:19 +07:00
|
|
|
struct intel_gmbus {
|
|
|
|
struct i2c_adapter adapter;
|
2012-11-10 22:58:21 +07:00
|
|
|
u32 force_bit;
|
2012-02-15 04:37:19 +07:00
|
|
|
u32 reg0;
|
2012-02-15 04:37:22 +07:00
|
|
|
u32 gpio_reg;
|
2012-02-28 06:43:09 +07:00
|
|
|
struct i2c_algo_bit_data bit_algo;
|
2012-02-15 04:37:19 +07:00
|
|
|
struct drm_i915_private *dev_priv;
|
|
|
|
};
|
|
|
|
|
2012-11-03 01:55:02 +07:00
|
|
|
struct i915_suspend_saved_registers {
|
2007-11-22 11:14:14 +07:00
|
|
|
u8 saveLBB;
|
|
|
|
u32 saveDSPACNTR;
|
|
|
|
u32 saveDSPBCNTR;
|
2008-05-07 09:27:53 +07:00
|
|
|
u32 saveDSPARB;
|
2007-11-22 11:14:14 +07:00
|
|
|
u32 savePIPEACONF;
|
|
|
|
u32 savePIPEBCONF;
|
|
|
|
u32 savePIPEASRC;
|
|
|
|
u32 savePIPEBSRC;
|
|
|
|
u32 saveFPA0;
|
|
|
|
u32 saveFPA1;
|
|
|
|
u32 saveDPLL_A;
|
|
|
|
u32 saveDPLL_A_MD;
|
|
|
|
u32 saveHTOTAL_A;
|
|
|
|
u32 saveHBLANK_A;
|
|
|
|
u32 saveHSYNC_A;
|
|
|
|
u32 saveVTOTAL_A;
|
|
|
|
u32 saveVBLANK_A;
|
|
|
|
u32 saveVSYNC_A;
|
|
|
|
u32 saveBCLRPAT_A;
|
2009-11-06 09:13:02 +07:00
|
|
|
u32 saveTRANSACONF;
|
2009-10-21 14:27:01 +07:00
|
|
|
u32 saveTRANS_HTOTAL_A;
|
|
|
|
u32 saveTRANS_HBLANK_A;
|
|
|
|
u32 saveTRANS_HSYNC_A;
|
|
|
|
u32 saveTRANS_VTOTAL_A;
|
|
|
|
u32 saveTRANS_VBLANK_A;
|
|
|
|
u32 saveTRANS_VSYNC_A;
|
2008-02-20 06:39:58 +07:00
|
|
|
u32 savePIPEASTAT;
|
2007-11-22 11:14:14 +07:00
|
|
|
u32 saveDSPASTRIDE;
|
|
|
|
u32 saveDSPASIZE;
|
|
|
|
u32 saveDSPAPOS;
|
2008-07-30 01:54:06 +07:00
|
|
|
u32 saveDSPAADDR;
|
2007-11-22 11:14:14 +07:00
|
|
|
u32 saveDSPASURF;
|
|
|
|
u32 saveDSPATILEOFF;
|
|
|
|
u32 savePFIT_PGM_RATIOS;
|
2009-10-15 02:33:41 +07:00
|
|
|
u32 saveBLC_HIST_CTL;
|
2007-11-22 11:14:14 +07:00
|
|
|
u32 saveBLC_PWM_CTL;
|
|
|
|
u32 saveBLC_PWM_CTL2;
|
2009-10-21 14:27:01 +07:00
|
|
|
u32 saveBLC_CPU_PWM_CTL;
|
|
|
|
u32 saveBLC_CPU_PWM_CTL2;
|
2007-11-22 11:14:14 +07:00
|
|
|
u32 saveFPB0;
|
|
|
|
u32 saveFPB1;
|
|
|
|
u32 saveDPLL_B;
|
|
|
|
u32 saveDPLL_B_MD;
|
|
|
|
u32 saveHTOTAL_B;
|
|
|
|
u32 saveHBLANK_B;
|
|
|
|
u32 saveHSYNC_B;
|
|
|
|
u32 saveVTOTAL_B;
|
|
|
|
u32 saveVBLANK_B;
|
|
|
|
u32 saveVSYNC_B;
|
|
|
|
u32 saveBCLRPAT_B;
|
2009-11-06 09:13:02 +07:00
|
|
|
u32 saveTRANSBCONF;
|
2009-10-21 14:27:01 +07:00
|
|
|
u32 saveTRANS_HTOTAL_B;
|
|
|
|
u32 saveTRANS_HBLANK_B;
|
|
|
|
u32 saveTRANS_HSYNC_B;
|
|
|
|
u32 saveTRANS_VTOTAL_B;
|
|
|
|
u32 saveTRANS_VBLANK_B;
|
|
|
|
u32 saveTRANS_VSYNC_B;
|
2008-02-20 06:39:58 +07:00
|
|
|
u32 savePIPEBSTAT;
|
2007-11-22 11:14:14 +07:00
|
|
|
u32 saveDSPBSTRIDE;
|
|
|
|
u32 saveDSPBSIZE;
|
|
|
|
u32 saveDSPBPOS;
|
2008-07-30 01:54:06 +07:00
|
|
|
u32 saveDSPBADDR;
|
2007-11-22 11:14:14 +07:00
|
|
|
u32 saveDSPBSURF;
|
|
|
|
u32 saveDSPBTILEOFF;
|
2008-07-30 01:54:06 +07:00
|
|
|
u32 saveVGA0;
|
|
|
|
u32 saveVGA1;
|
|
|
|
u32 saveVGA_PD;
|
2007-11-22 11:14:14 +07:00
|
|
|
u32 saveVGACNTRL;
|
|
|
|
u32 saveADPA;
|
|
|
|
u32 saveLVDS;
|
2008-07-30 01:54:06 +07:00
|
|
|
u32 savePP_ON_DELAYS;
|
|
|
|
u32 savePP_OFF_DELAYS;
|
2007-11-22 11:14:14 +07:00
|
|
|
u32 saveDVOA;
|
|
|
|
u32 saveDVOB;
|
|
|
|
u32 saveDVOC;
|
|
|
|
u32 savePP_ON;
|
|
|
|
u32 savePP_OFF;
|
|
|
|
u32 savePP_CONTROL;
|
2008-07-30 01:54:06 +07:00
|
|
|
u32 savePP_DIVISOR;
|
2007-11-22 11:14:14 +07:00
|
|
|
u32 savePFIT_CONTROL;
|
|
|
|
u32 save_palette_a[256];
|
|
|
|
u32 save_palette_b[256];
|
|
|
|
u32 saveFBC_CONTROL;
|
2008-02-20 06:39:58 +07:00
|
|
|
u32 saveIER;
|
|
|
|
u32 saveIIR;
|
|
|
|
u32 saveIMR;
|
2009-10-21 14:27:01 +07:00
|
|
|
u32 saveDEIER;
|
|
|
|
u32 saveDEIMR;
|
|
|
|
u32 saveGTIER;
|
|
|
|
u32 saveGTIMR;
|
|
|
|
u32 saveFDI_RXA_IMR;
|
|
|
|
u32 saveFDI_RXB_IMR;
|
2008-02-17 10:19:29 +07:00
|
|
|
u32 saveCACHE_MODE_0;
|
|
|
|
u32 saveMI_ARB_STATE;
|
2007-11-22 11:14:14 +07:00
|
|
|
u32 saveSWF0[16];
|
|
|
|
u32 saveSWF1[16];
|
|
|
|
u32 saveSWF2[3];
|
|
|
|
u8 saveMSR;
|
|
|
|
u8 saveSR[8];
|
2008-02-08 02:15:20 +07:00
|
|
|
u8 saveGR[25];
|
2007-11-22 11:14:14 +07:00
|
|
|
u8 saveAR_INDEX;
|
2008-05-07 09:25:46 +07:00
|
|
|
u8 saveAR[21];
|
2007-11-22 11:14:14 +07:00
|
|
|
u8 saveDACMASK;
|
2008-05-07 09:25:46 +07:00
|
|
|
u8 saveCR[37];
|
2011-10-10 02:52:02 +07:00
|
|
|
uint64_t saveFENCE[I915_MAX_NUM_FENCES];
|
2009-06-03 14:26:58 +07:00
|
|
|
u32 saveCURACNTR;
|
|
|
|
u32 saveCURAPOS;
|
|
|
|
u32 saveCURABASE;
|
|
|
|
u32 saveCURBCNTR;
|
|
|
|
u32 saveCURBPOS;
|
|
|
|
u32 saveCURBBASE;
|
|
|
|
u32 saveCURSIZE;
|
2009-04-08 06:16:42 +07:00
|
|
|
u32 saveDP_B;
|
|
|
|
u32 saveDP_C;
|
|
|
|
u32 saveDP_D;
|
|
|
|
u32 savePIPEA_GMCH_DATA_M;
|
|
|
|
u32 savePIPEB_GMCH_DATA_M;
|
|
|
|
u32 savePIPEA_GMCH_DATA_N;
|
|
|
|
u32 savePIPEB_GMCH_DATA_N;
|
|
|
|
u32 savePIPEA_DP_LINK_M;
|
|
|
|
u32 savePIPEB_DP_LINK_M;
|
|
|
|
u32 savePIPEA_DP_LINK_N;
|
|
|
|
u32 savePIPEB_DP_LINK_N;
|
2009-10-21 14:27:01 +07:00
|
|
|
u32 saveFDI_RXA_CTL;
|
|
|
|
u32 saveFDI_TXA_CTL;
|
|
|
|
u32 saveFDI_RXB_CTL;
|
|
|
|
u32 saveFDI_TXB_CTL;
|
|
|
|
u32 savePFA_CTL_1;
|
|
|
|
u32 savePFB_CTL_1;
|
|
|
|
u32 savePFA_WIN_SZ;
|
|
|
|
u32 savePFB_WIN_SZ;
|
|
|
|
u32 savePFA_WIN_POS;
|
|
|
|
u32 savePFB_WIN_POS;
|
2009-11-06 09:13:02 +07:00
|
|
|
u32 savePCH_DREF_CONTROL;
|
|
|
|
u32 saveDISP_ARB_CTL;
|
|
|
|
u32 savePIPEA_DATA_M1;
|
|
|
|
u32 savePIPEA_DATA_N1;
|
|
|
|
u32 savePIPEA_LINK_M1;
|
|
|
|
u32 savePIPEA_LINK_N1;
|
|
|
|
u32 savePIPEB_DATA_M1;
|
|
|
|
u32 savePIPEB_DATA_N1;
|
|
|
|
u32 savePIPEB_LINK_M1;
|
|
|
|
u32 savePIPEB_LINK_N1;
|
2010-02-03 01:30:47 +07:00
|
|
|
u32 saveMCHBAR_RENDER_STANDBY;
|
2011-07-27 03:53:06 +07:00
|
|
|
u32 savePCH_PORT_HOTPLUG;
|
2012-11-03 01:55:02 +07:00
|
|
|
};
|
2012-11-03 01:55:03 +07:00
|
|
|
|
2014-05-05 19:19:56 +07:00
|
|
|
struct vlv_s0ix_state {
|
|
|
|
/* GAM */
|
|
|
|
u32 wr_watermark;
|
|
|
|
u32 gfx_prio_ctrl;
|
|
|
|
u32 arb_mode;
|
|
|
|
u32 gfx_pend_tlb0;
|
|
|
|
u32 gfx_pend_tlb1;
|
|
|
|
u32 lra_limits[GEN7_LRA_LIMITS_REG_NUM];
|
|
|
|
u32 media_max_req_count;
|
|
|
|
u32 gfx_max_req_count;
|
|
|
|
u32 render_hwsp;
|
|
|
|
u32 ecochk;
|
|
|
|
u32 bsd_hwsp;
|
|
|
|
u32 blt_hwsp;
|
|
|
|
u32 tlb_rd_addr;
|
|
|
|
|
|
|
|
/* MBC */
|
|
|
|
u32 g3dctl;
|
|
|
|
u32 gsckgctl;
|
|
|
|
u32 mbctl;
|
|
|
|
|
|
|
|
/* GCP */
|
|
|
|
u32 ucgctl1;
|
|
|
|
u32 ucgctl3;
|
|
|
|
u32 rcgctl1;
|
|
|
|
u32 rcgctl2;
|
|
|
|
u32 rstctl;
|
|
|
|
u32 misccpctl;
|
|
|
|
|
|
|
|
/* GPM */
|
|
|
|
u32 gfxpause;
|
|
|
|
u32 rpdeuhwtc;
|
|
|
|
u32 rpdeuc;
|
|
|
|
u32 ecobus;
|
|
|
|
u32 pwrdwnupctl;
|
|
|
|
u32 rp_down_timeout;
|
|
|
|
u32 rp_deucsw;
|
|
|
|
u32 rcubmabdtmr;
|
|
|
|
u32 rcedata;
|
|
|
|
u32 spare2gh;
|
|
|
|
|
|
|
|
/* Display 1 CZ domain */
|
|
|
|
u32 gt_imr;
|
|
|
|
u32 gt_ier;
|
|
|
|
u32 pm_imr;
|
|
|
|
u32 pm_ier;
|
|
|
|
u32 gt_scratch[GEN7_GT_SCRATCH_REG_NUM];
|
|
|
|
|
|
|
|
/* GT SA CZ domain */
|
|
|
|
u32 tilectl;
|
|
|
|
u32 gt_fifoctl;
|
|
|
|
u32 gtlc_wake_ctrl;
|
|
|
|
u32 gtlc_survive;
|
|
|
|
u32 pmwgicz;
|
|
|
|
|
|
|
|
/* Display 2 CZ domain */
|
|
|
|
u32 gu_ctl0;
|
|
|
|
u32 gu_ctl1;
|
|
|
|
u32 clock_gate_dis2;
|
|
|
|
};
|
|
|
|
|
2014-07-11 02:31:18 +07:00
|
|
|
struct intel_rps_ei {
|
|
|
|
u32 cz_clock;
|
|
|
|
u32 render_c0;
|
|
|
|
u32 media_c0;
|
2014-07-04 04:33:01 +07:00
|
|
|
};
|
|
|
|
|
2012-11-03 01:55:03 +07:00
|
|
|
struct intel_gen6_power_mgmt {
|
drm/i915: sanitize rps irq disabling
When disabling the RPS interrupts there is a tricky dependency between
the thread disabling the interrupts, the RPS interrupt handler and the
corresponding RPS work. The RPS work can reenable the interrupts, so
there is no straightforward order in the disabling thread to (1) make
sure that any RPS work is flushed and to (2) disable all RPS
interrupts. Currently this is solved by masking the interrupts using two
separate mask registers (first level display IMR and PM IMR) and doing
the disabling when all first level interrupts are disabled.
This works, but the requirement to run with all first level interrupts
disabled is unnecessary making the suspend / unload time ordering of RPS
disabling wrt. other unitialization steps difficult and error prone.
Removing this restriction allows us to disable RPS early during suspend
/ unload and forget about it for the rest of the sequence. By adding a
more explicit method for avoiding the above race, it also becomes easier
to prove its correctness. Finally currently we can hit the WARN in
snb_update_pm_irq(), when a final RPS work runs with the first level
interrupts already disabled. This won't lead to any problem (due to the
separate interrupt masks), but with the change in this and the next
patch we can get rid of the WARN, while leaving it in place for other
scenarios.
To address the above points, add a new RPS interrupts_enabled flag and
use this during RPS disabling to avoid requeuing the RPS work and
reenabling of the RPS interrupts. Since the interrupt disabling happens
now in intel_suspend_gt_powersave(), we will disable RPS interrupts
explicitly during suspend (and not just through the first level mask),
but there is no problem doing so, it's also more consistent and allows
us to unify more of the RPS disabling during suspend and unload time in
the next patch.
v2/v3:
- rebase on patch "drm/i915: move rps irq disable one level up" in the
patchset
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-11-19 20:30:04 +07:00
|
|
|
/*
|
|
|
|
* work, interrupts_enabled and pm_iir are protected by
|
|
|
|
* dev_priv->irq_lock
|
|
|
|
*/
|
2012-11-03 01:55:03 +07:00
|
|
|
struct work_struct work;
|
drm/i915: sanitize rps irq disabling
When disabling the RPS interrupts there is a tricky dependency between
the thread disabling the interrupts, the RPS interrupt handler and the
corresponding RPS work. The RPS work can reenable the interrupts, so
there is no straightforward order in the disabling thread to (1) make
sure that any RPS work is flushed and to (2) disable all RPS
interrupts. Currently this is solved by masking the interrupts using two
separate mask registers (first level display IMR and PM IMR) and doing
the disabling when all first level interrupts are disabled.
This works, but the requirement to run with all first level interrupts
disabled is unnecessary making the suspend / unload time ordering of RPS
disabling wrt. other unitialization steps difficult and error prone.
Removing this restriction allows us to disable RPS early during suspend
/ unload and forget about it for the rest of the sequence. By adding a
more explicit method for avoiding the above race, it also becomes easier
to prove its correctness. Finally currently we can hit the WARN in
snb_update_pm_irq(), when a final RPS work runs with the first level
interrupts already disabled. This won't lead to any problem (due to the
separate interrupt masks), but with the change in this and the next
patch we can get rid of the WARN, while leaving it in place for other
scenarios.
To address the above points, add a new RPS interrupts_enabled flag and
use this during RPS disabling to avoid requeuing the RPS work and
reenabling of the RPS interrupts. Since the interrupt disabling happens
now in intel_suspend_gt_powersave(), we will disable RPS interrupts
explicitly during suspend (and not just through the first level mask),
but there is no problem doing so, it's also more consistent and allows
us to unify more of the RPS disabling during suspend and unload time in
the next patch.
v2/v3:
- rebase on patch "drm/i915: move rps irq disable one level up" in the
patchset
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-11-19 20:30:04 +07:00
|
|
|
bool interrupts_enabled;
|
2012-11-03 01:55:03 +07:00
|
|
|
u32 pm_iir;
|
2013-07-05 04:35:28 +07:00
|
|
|
|
2014-03-20 08:31:11 +07:00
|
|
|
/* Frequencies are stored in potentially platform dependent multiples.
|
|
|
|
* In other words, *_freq needs to be multiplied by X to be interesting.
|
|
|
|
* Soft limits are those which are used for the dynamic reclocking done
|
|
|
|
* by the driver (raise frequencies under heavy loads, and lower for
|
|
|
|
* lighter loads). Hard limits are those imposed by the hardware.
|
|
|
|
*
|
|
|
|
* A distinction is made for overclocking, which is never enabled by
|
|
|
|
* default, and is considered to be above the hard limit if it's
|
|
|
|
* possible at all.
|
|
|
|
*/
|
|
|
|
u8 cur_freq; /* Current frequency (cached, may not == HW) */
|
|
|
|
u8 min_freq_softlimit; /* Minimum frequency permitted by the driver */
|
|
|
|
u8 max_freq_softlimit; /* Max frequency permitted by the driver */
|
|
|
|
u8 max_freq; /* Maximum frequency, RP0 if not overclocking */
|
|
|
|
u8 min_freq; /* AKA RPn. Minimum frequency */
|
|
|
|
u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */
|
|
|
|
u8 rp1_freq; /* "less than" RP0 power/freqency */
|
|
|
|
u8 rp0_freq; /* Non-overclocked max frequency. */
|
2014-07-10 14:46:24 +07:00
|
|
|
u32 cz_freq;
|
2012-11-03 01:14:00 +07:00
|
|
|
|
2014-07-04 04:33:01 +07:00
|
|
|
u32 ei_interrupt_count;
|
2012-11-03 01:14:00 +07:00
|
|
|
|
drm/i915: Tweak RPS thresholds to more aggressively downclock
After applying wait-boost we often find ourselves stuck at higher clocks
than required. The current threshold value requires the GPU to be
continuously and completely idle for 313ms before it is dropped by one
bin. Conversely, we require the GPU to be busy for an average of 90% over
a 84ms period before we upclock. So the current thresholds almost never
downclock the GPU, and respond very slowly to sudden demands for more
power. It is easy to observe that we currently lock into the wrong bin
and both underperform in benchmarks and consume more power than optimal
(just by repeating the task and measuring the different results).
An alternative approach, as discussed in the bspec, is to use a
continuous threshold for upclocking, and an average value for downclocking.
This is good for quickly detecting and reacting to state changes within a
frame, however it fails with the common throttling method of waiting
upon the outstanding frame - at least it is difficult to choose a
threshold that works well at 15,000fps and at 60fps. So continue to use
average busy/idle loads to determine frequency change.
v2: Use 3 power zones to keep frequencies low in steady-state mostly
idle (e.g. scrolling, interactive 2D drawing), and frequencies high
for demanding games. In between those end-states, we use a
fast-reclocking algorithm to converge more quickly on the desired bin.
v3: Bug fixes - make sure we reset adj after switching power zones.
v4: Tune - drop the continuous busy thresholds as it prevents us from
choosing the right frequency for glxgears style swap benchmarks. Instead
the goal is to be able to find the right clocks irrespective of the
wait-boost.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Stéphane Marchesin <stephane.marchesin@gmail.com>
Cc: Owen Taylor <otaylor@redhat.com>
Cc: "Meng, Mengmeng" <mengmeng.meng@intel.com>
Cc: "Zhuang, Lena" <lena.zhuang@intel.com>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-09-25 23:34:57 +07:00
|
|
|
int last_adj;
|
|
|
|
enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
|
|
|
|
|
2013-10-11 03:58:50 +07:00
|
|
|
bool enabled;
|
2012-11-03 01:14:00 +07:00
|
|
|
struct delayed_work delayed_resume_work;
|
2012-11-03 01:14:01 +07:00
|
|
|
|
2014-07-11 02:31:18 +07:00
|
|
|
/* manual wa residency calculations */
|
|
|
|
struct intel_rps_ei up_ei, down_ei;
|
|
|
|
|
2012-11-03 01:14:01 +07:00
|
|
|
/*
|
|
|
|
* Protects RPS/RC6 register access and PCU communication.
|
|
|
|
* Must be taken after struct_mutex if nested.
|
|
|
|
*/
|
|
|
|
struct mutex hw_lock;
|
2012-11-03 01:55:03 +07:00
|
|
|
};
|
|
|
|
|
2012-11-30 04:18:51 +07:00
|
|
|
/* defined intel_pm.c */
|
|
|
|
extern spinlock_t mchdev_lock;
|
|
|
|
|
2012-11-03 01:55:03 +07:00
|
|
|
struct intel_ilk_power_mgmt {
|
|
|
|
u8 cur_delay;
|
|
|
|
u8 min_delay;
|
|
|
|
u8 max_delay;
|
|
|
|
u8 fmax;
|
|
|
|
u8 fstart;
|
|
|
|
|
|
|
|
u64 last_count1;
|
|
|
|
unsigned long last_time1;
|
|
|
|
unsigned long chipset_power;
|
|
|
|
u64 last_count2;
|
2014-07-17 04:05:06 +07:00
|
|
|
u64 last_time2;
|
2012-11-03 01:55:03 +07:00
|
|
|
unsigned long gfx_power;
|
|
|
|
u8 corr;
|
|
|
|
|
|
|
|
int c_m;
|
|
|
|
int r_t;
|
2012-11-03 01:55:04 +07:00
|
|
|
|
|
|
|
struct drm_i915_gem_object *pwrctx;
|
|
|
|
struct drm_i915_gem_object *renderctx;
|
2012-11-03 01:55:03 +07:00
|
|
|
};
|
|
|
|
|
2014-03-05 00:22:55 +07:00
|
|
|
struct drm_i915_private;
|
|
|
|
struct i915_power_well;
|
|
|
|
|
|
|
|
struct i915_power_well_ops {
|
|
|
|
/*
|
|
|
|
* Synchronize the well's hw state to match the current sw state, for
|
|
|
|
* example enable/disable it based on the current refcount. Called
|
|
|
|
* during driver init and resume time, possibly after first calling
|
|
|
|
* the enable/disable handlers.
|
|
|
|
*/
|
|
|
|
void (*sync_hw)(struct drm_i915_private *dev_priv,
|
|
|
|
struct i915_power_well *power_well);
|
|
|
|
/*
|
|
|
|
* Enable the well and resources that depend on it (for example
|
|
|
|
* interrupts located on the well). Called after the 0->1 refcount
|
|
|
|
* transition.
|
|
|
|
*/
|
|
|
|
void (*enable)(struct drm_i915_private *dev_priv,
|
|
|
|
struct i915_power_well *power_well);
|
|
|
|
/*
|
|
|
|
* Disable the well and resources that depend on it. Called after
|
|
|
|
* the 1->0 refcount transition.
|
|
|
|
*/
|
|
|
|
void (*disable)(struct drm_i915_private *dev_priv,
|
|
|
|
struct i915_power_well *power_well);
|
|
|
|
/* Returns the hw enabled state. */
|
|
|
|
bool (*is_enabled)(struct drm_i915_private *dev_priv,
|
|
|
|
struct i915_power_well *power_well);
|
|
|
|
};
|
|
|
|
|
2013-05-30 21:07:11 +07:00
|
|
|
/* Power well structure for haswell */
|
|
|
|
struct i915_power_well {
|
2013-11-25 22:15:29 +07:00
|
|
|
const char *name;
|
2013-11-25 22:15:30 +07:00
|
|
|
bool always_on;
|
2013-05-30 21:07:11 +07:00
|
|
|
/* power well enable/disable usage count */
|
|
|
|
int count;
|
2014-06-06 00:31:47 +07:00
|
|
|
/* cached hw enabled state */
|
|
|
|
bool hw_enabled;
|
2013-11-25 22:15:29 +07:00
|
|
|
unsigned long domains;
|
2014-03-05 21:20:56 +07:00
|
|
|
unsigned long data;
|
2014-03-05 00:22:55 +07:00
|
|
|
const struct i915_power_well_ops *ops;
|
2013-05-30 21:07:11 +07:00
|
|
|
};
|
|
|
|
|
2013-10-25 21:36:47 +07:00
|
|
|
struct i915_power_domains {
|
2013-10-25 21:36:48 +07:00
|
|
|
/*
|
|
|
|
* Power wells needed for initialization at driver init and suspend
|
|
|
|
* time are on. They are kept on until after the first modeset.
|
|
|
|
*/
|
|
|
|
bool init_power_on;
|
2014-04-25 17:19:05 +07:00
|
|
|
bool initializing;
|
2013-11-25 22:15:29 +07:00
|
|
|
int power_well_count;
|
2013-10-25 21:36:48 +07:00
|
|
|
|
2013-10-25 21:36:47 +07:00
|
|
|
struct mutex lock;
|
2013-11-25 22:15:35 +07:00
|
|
|
int domain_use_count[POWER_DOMAIN_NUM];
|
2013-11-25 22:15:29 +07:00
|
|
|
struct i915_power_well *power_wells;
|
2013-10-25 21:36:47 +07:00
|
|
|
};
|
|
|
|
|
2013-09-20 01:13:41 +07:00
|
|
|
#define MAX_L3_SLICES 2
|
2012-11-03 01:55:07 +07:00
|
|
|
struct intel_l3_parity {
|
2013-09-20 01:13:41 +07:00
|
|
|
u32 *remap_info[MAX_L3_SLICES];
|
2012-11-03 01:55:07 +07:00
|
|
|
struct work_struct error_work;
|
2013-09-20 01:13:41 +07:00
|
|
|
int which_slice;
|
2012-11-03 01:55:07 +07:00
|
|
|
};
|
|
|
|
|
2012-11-14 23:14:03 +07:00
|
|
|
struct i915_gem_mm {
|
|
|
|
/** Memory allocator for GTT stolen memory */
|
|
|
|
struct drm_mm stolen;
|
|
|
|
/** List of all objects in gtt_space. Used to restore gtt
|
|
|
|
* mappings on resume */
|
|
|
|
struct list_head bound_list;
|
|
|
|
/**
|
|
|
|
* List of objects which are not bound to the GTT (thus
|
|
|
|
* are idle and not used by the GPU) but still have
|
|
|
|
* (presumably uncached) pages still attached.
|
|
|
|
*/
|
|
|
|
struct list_head unbound_list;
|
|
|
|
|
|
|
|
/** Usable portion of the GTT for GEM */
|
|
|
|
unsigned long stolen_base; /* limited to low memory (32-bit) */
|
|
|
|
|
|
|
|
/** PPGTT used for aliasing the PPGTT with the GTT */
|
|
|
|
struct i915_hw_ppgtt *aliasing_ppgtt;
|
|
|
|
|
2014-05-20 14:28:43 +07:00
|
|
|
struct notifier_block oom_notifier;
|
2014-03-25 20:23:04 +07:00
|
|
|
struct shrinker shrinker;
|
2012-11-14 23:14:03 +07:00
|
|
|
bool shrinker_no_lock_stealing;
|
|
|
|
|
|
|
|
/** LRU list of objects with fence regs on them. */
|
|
|
|
struct list_head fence_list;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* We leave the user IRQ off as much as possible,
|
|
|
|
* but this means that requests will finish and never
|
|
|
|
* be retired once the system goes idle. Set a timer to
|
|
|
|
* fire periodically while the ring is running. When it
|
|
|
|
* fires, go retire requests.
|
|
|
|
*/
|
|
|
|
struct delayed_work retire_work;
|
|
|
|
|
drm/i915: Boost RPS frequency for CPU stalls
If we encounter a situation where the CPU blocks waiting for results
from the GPU, give the GPU a kick to boost its the frequency.
This should work to reduce user interface stalls and to quickly promote
mesa to high frequencies - but the cost is that our requested frequency
stalls high (as we do not idle for long enough before rc6 to start
reducing frequencies, nor are we aggressive at down clocking an
underused GPU). However, this should be mitigated by rc6 itself powering
off the GPU when idle, and that energy use is dependent upon the workload
of the GPU in addition to its frequency (e.g. the math or sampler
functions only consume power when used). Still, this is likely to
adversely affect light workloads.
In particular, this nearly eliminates the highly noticeable wake-up lag
in animations from idle. For example, expose or workspace transitions.
(However, given the situation where we fail to downclock, our requested
frequency is almost always the maximum, except for Baytrail where we
manually downclock upon idling. This often masks the latency of
upclocking after being idle, so animations are typically smooth - at the
cost of increased power consumption.)
Stéphane raised the concern that this will punish good applications and
reward bad applications - but due to the nature of how mesa performs its
client throttling, I believe all mesa applications will be roughly
equally affected. To address this concern, and to prevent applications
like compositors from permanently boosting the RPS state, we ratelimit the
frequency of the wait-boosts each client recieves.
Unfortunately, this techinique is ineffective with Ironlake - which also
has dynamic render power states and suffers just as dramatically. For
Ironlake, the thermal/power headroom is shared with the CPU through
Intelligent Power Sharing and the intel-ips module. This leaves us with
no GPU boost frequencies available when coming out of idle, and due to
hardware limitations we cannot change the arbitration between the CPU and
GPU quickly enough to be effective.
v2: Limit each client to receiving a single boost for each active period.
Tested by QA to only marginally increase power, and to demonstrably
increase throughput in games. No latency measurements yet.
v3: Cater for front-buffer rendering with manual throttling.
v4: Tidy up.
v5: Sadly the compositor needs frequent boosts as it may never idle, but
due to its picking mechanism (using ReadPixels) may require frequent
waits. Those waits, along with the waits for the vrefresh swap, conspire
to keep the GPU at low frequencies despite the interactive latency. To
overcome this we ditch the one-boost-per-active-period and just ratelimit
the number of wait-boosts each client can receive.
Reported-and-tested-by: Paul Neumann <paul104x@yahoo.de>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68716
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Stéphane Marchesin <stephane.marchesin@gmail.com>
Cc: Owen Taylor <otaylor@redhat.com>
Cc: "Meng, Mengmeng" <mengmeng.meng@intel.com>
Cc: "Zhuang, Lena" <lena.zhuang@intel.com>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
[danvet: No extern for function prototypes in headers.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-09-25 23:34:56 +07:00
|
|
|
/**
|
|
|
|
* When we detect an idle GPU, we want to turn on
|
|
|
|
* powersaving features. So once we see that there
|
|
|
|
* are no more requests outstanding and no more
|
|
|
|
* arrive within a small period of time, we fire
|
|
|
|
* off the idle_work.
|
|
|
|
*/
|
|
|
|
struct delayed_work idle_work;
|
|
|
|
|
2012-11-14 23:14:03 +07:00
|
|
|
/**
|
|
|
|
* Are we in a non-interruptible section of code like
|
|
|
|
* modesetting?
|
|
|
|
*/
|
|
|
|
bool interruptible;
|
|
|
|
|
2014-02-22 00:55:39 +07:00
|
|
|
/**
|
|
|
|
* Is the GPU currently considered idle, or busy executing userspace
|
|
|
|
* requests? Whilst idle, we attempt to power down the hardware and
|
|
|
|
* display clocks. In order to reduce the effect on performance, there
|
|
|
|
* is a slight delay before we do so.
|
|
|
|
*/
|
|
|
|
bool busy;
|
|
|
|
|
2014-05-21 22:37:52 +07:00
|
|
|
/* the indicator for dispatch video commands on two BSD rings */
|
|
|
|
int bsd_ring_dispatch_index;
|
|
|
|
|
2012-11-14 23:14:03 +07:00
|
|
|
/** Bit 6 swizzling required for X tiling */
|
|
|
|
uint32_t bit_6_swizzle_x;
|
|
|
|
/** Bit 6 swizzling required for Y tiling */
|
|
|
|
uint32_t bit_6_swizzle_y;
|
|
|
|
|
|
|
|
/* accounting, useful for userland debugging */
|
2013-07-25 03:40:23 +07:00
|
|
|
spinlock_t object_stat_lock;
|
2012-11-14 23:14:03 +07:00
|
|
|
size_t object_memory;
|
|
|
|
u32 object_count;
|
|
|
|
};
|
|
|
|
|
2013-05-23 17:55:35 +07:00
|
|
|
struct drm_i915_error_state_buf {
|
2014-08-22 20:41:39 +07:00
|
|
|
struct drm_i915_private *i915;
|
2013-05-23 17:55:35 +07:00
|
|
|
unsigned bytes;
|
|
|
|
unsigned size;
|
|
|
|
int err;
|
|
|
|
u8 *buf;
|
|
|
|
loff_t start;
|
|
|
|
loff_t pos;
|
|
|
|
};
|
|
|
|
|
2013-06-06 19:18:39 +07:00
|
|
|
struct i915_error_state_file_priv {
|
|
|
|
struct drm_device *dev;
|
|
|
|
struct drm_i915_error_state *error;
|
|
|
|
};
|
|
|
|
|
2012-11-14 23:14:04 +07:00
|
|
|
struct i915_gpu_error {
|
|
|
|
/* For hangcheck timer */
|
|
|
|
#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
|
|
|
|
#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)
|
2013-08-30 20:19:28 +07:00
|
|
|
/* Hang gpu twice in this window and your context gets banned */
|
|
|
|
#define DRM_I915_CTX_BAN_PERIOD DIV_ROUND_UP(8*DRM_I915_HANGCHECK_PERIOD, 1000)
|
|
|
|
|
2012-11-14 23:14:04 +07:00
|
|
|
struct timer_list hangcheck_timer;
|
|
|
|
|
|
|
|
/* For reset and error_state handling. */
|
|
|
|
spinlock_t lock;
|
|
|
|
/* Protected by the above dev->gpu_error.lock. */
|
|
|
|
struct drm_i915_error_state *first_error;
|
|
|
|
struct work_struct work;
|
|
|
|
|
2013-09-25 23:34:55 +07:00
|
|
|
|
|
|
|
unsigned long missed_irq_rings;
|
|
|
|
|
2012-11-15 23:17:22 +07:00
|
|
|
/**
|
2013-11-12 19:44:19 +07:00
|
|
|
* State variable controlling the reset flow and count
|
2012-11-15 23:17:22 +07:00
|
|
|
*
|
2013-11-12 19:44:19 +07:00
|
|
|
* This is a counter which gets incremented when reset is triggered,
|
|
|
|
* and again when reset has been handled. So odd values (lowest bit set)
|
|
|
|
* means that reset is in progress and even values that
|
|
|
|
* (reset_counter >> 1):th reset was successfully completed.
|
|
|
|
*
|
|
|
|
* If reset is not completed succesfully, the I915_WEDGE bit is
|
|
|
|
* set meaning that hardware is terminally sour and there is no
|
|
|
|
* recovery. All waiters on the reset_queue will be woken when
|
|
|
|
* that happens.
|
|
|
|
*
|
|
|
|
* This counter is used by the wait_seqno code to notice that reset
|
|
|
|
* event happened and it needs to restart the entire ioctl (since most
|
|
|
|
* likely the seqno it waited for won't ever signal anytime soon).
|
drm/i915: create a race-free reset detection
With the previous patch the state transition handling of the reset
code itself is now (hopefully) race free and solid. But that still
leaves out everyone else - with the various lock-free wait paths
we have there's the possibility that the reset happens between the
point where we read the seqno we should wait on and the actual wait.
And if __wait_seqno then never sees the RESET_IN_PROGRESS state, we'll
happily wait for a seqno which will in all likelyhood never signal.
In practice this is not a big problem since the X server gets
constantly interrupted, and can then submit more work (hopefully) to
unblock everyone else: As soon as a new seqno write lands, all waiters
will unblock. But running the i-g-t reset testcase ZZ_hangman can
expose this race, especially on slower hw with fewer cpu cores.
Now looking forward to ARB_robustness and friends that's not the best
possible behaviour, hence this patch adds a reset_counter to be able
to detect any reset, even if a given thread never observed the
in-progress state.
The important part is to correctly order things:
- The write side needs to increment the counter after any seqno gets
reset. Hence we need to do that at the end of the reset work, and
again wake everyone up. We also need to place a barrier in between
any possible seqno changes and the counter increment, since any
unlock operations only guarantee that nothing leaks out, but not
that at later load operation gets moved ahead.
- On the read side we need to ensure that no reset can sneak in and
invalidate the seqno. In all cases we can use the one-sided barrier
that unlock operations guarantee (of the lock protecting the
respective seqno/ring pair) to ensure correct ordering. Hence it is
sufficient to place the atomic read before the mutex/spin_unlock and
no additional barriers are required.
The end-result of all this is that we need to wake up everyone twice
in a reset operation:
- First, before the reset starts, to get any lockholders of the locks,
so that the reset can proceed.
- Second, after the reset is completed, to allow waiters to properly
and reliably detect the reset condition and bail out.
I admit that this entire reset_counter thing smells a bit like
overkill, but I think it's justified since it makes it really explicit
what the bail-out condition is. And we need a reset counter anyway to
implement ARB_robustness, and imo with finer-grained locking on the
horizont this is the most resilient scheme I could think of.
v2: Drop spurious change in the wait_for_error EXIT_COND - we only
need to wait until we leave the reset-in-progress wedged state.
v3: Don't play tricks with barriers in the throttle ioctl, the
spin_unlock is barrier enough.
I've also considered using a little helper to grab the current
reset_counter, but then decided that hiding the atomic_read isn't a
great idea, since having it explicitly show up in the code is a nice
remainder to reviews to check the memory barriers.
v4: Add a comment to explain why we need to fall through in
__wait_seqno in the end variable assignments.
v5: Review from Damien:
- s/smb/smp/ in a comment
- don't increment the reset counter after we've set it to WEDGED. Now
we (again) properly wedge the gpu when the reset fails.
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-12-06 15:01:42 +07:00
|
|
|
*
|
|
|
|
* This is important for lock-free wait paths, where no contended lock
|
|
|
|
* naturally enforces the correct ordering between the bail-out of the
|
|
|
|
* waiter and the gpu reset work code.
|
2012-11-15 23:17:22 +07:00
|
|
|
*/
|
|
|
|
atomic_t reset_counter;
|
|
|
|
|
|
|
|
#define I915_RESET_IN_PROGRESS_FLAG 1
|
2013-11-12 19:44:19 +07:00
|
|
|
#define I915_WEDGED (1 << 31)
|
2012-11-15 23:17:22 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Waitqueue to signal when the reset has completed. Used by clients
|
|
|
|
* that wait for dev_priv->mm.wedged to settle.
|
|
|
|
*/
|
|
|
|
wait_queue_head_t reset_queue;
|
2012-11-14 23:14:05 +07:00
|
|
|
|
2014-03-28 23:18:18 +07:00
|
|
|
/* Userspace knobs for gpu hang simulation;
|
|
|
|
* combines both a ring mask, and extra flags
|
|
|
|
*/
|
|
|
|
u32 stop_rings;
|
|
|
|
#define I915_STOP_RING_ALLOW_BAN (1 << 31)
|
|
|
|
#define I915_STOP_RING_ALLOW_WARN (1 << 30)
|
2013-09-25 23:34:55 +07:00
|
|
|
|
|
|
|
/* For missed irq/seqno simulation. */
|
|
|
|
unsigned int test_irq_rings;
|
2014-08-16 00:51:35 +07:00
|
|
|
|
|
|
|
/* Used to prevent gem_check_wedged returning -EAGAIN during gpu reset */
|
|
|
|
bool reload_in_reset;
|
2012-11-14 23:14:04 +07:00
|
|
|
};
|
|
|
|
|
i915: ignore lid open event when resuming
i915 driver needs to do modeset when
1. system resumes from sleep
2. lid is opened
In PM_SUSPEND_MEM state, all the GPEs are cleared when system resumes,
thus it is the i915_resume code does the modeset rather than intel_lid_notify().
But in PM_SUSPEND_FREEZE state, this will be broken because
system is still responsive to the lid events.
1. When we close the lid in Freeze state, intel_lid_notify() sets modeset_on_lid.
2. When we reopen the lid, intel_lid_notify() will do a modeset,
before the system is resumed.
here is the error log,
[92146.548074] WARNING: at drivers/gpu/drm/i915/intel_display.c:1028 intel_wait_for_pipe_off+0x184/0x190 [i915]()
[92146.548076] Hardware name: VGN-Z540N
[92146.548078] pipe_off wait timed out
[92146.548167] Modules linked in: hid_generic usbhid hid snd_hda_codec_realtek snd_hda_intel snd_hda_codec parport_pc snd_hwdep ppdev snd_pcm_oss i915 snd_mixer_oss snd_pcm arc4 iwldvm snd_seq_dummy mac80211 snd_seq_oss snd_seq_midi fbcon tileblit font bitblit softcursor drm_kms_helper snd_rawmidi snd_seq_midi_event coretemp drm snd_seq kvm btusb bluetooth snd_timer iwlwifi pcmcia tpm_infineon i2c_algo_bit joydev snd_seq_device intel_agp cfg80211 snd intel_gtt yenta_socket pcmcia_rsrc sony_laptop agpgart microcode psmouse tpm_tis serio_raw mxm_wmi soundcore snd_page_alloc tpm acpi_cpufreq lpc_ich pcmcia_core tpm_bios mperf processor lp parport firewire_ohci firewire_core crc_itu_t sdhci_pci sdhci thermal e1000e
[92146.548173] Pid: 4304, comm: kworker/0:0 Tainted: G W 3.8.0-rc3-s0i3-v3-test+ #9
[92146.548175] Call Trace:
[92146.548189] [<c10378e2>] warn_slowpath_common+0x72/0xa0
[92146.548227] [<f86398b4>] ? intel_wait_for_pipe_off+0x184/0x190 [i915]
[92146.548263] [<f86398b4>] ? intel_wait_for_pipe_off+0x184/0x190 [i915]
[92146.548270] [<c10379b3>] warn_slowpath_fmt+0x33/0x40
[92146.548307] [<f86398b4>] intel_wait_for_pipe_off+0x184/0x190 [i915]
[92146.548344] [<f86399c2>] intel_disable_pipe+0x102/0x190 [i915]
[92146.548380] [<f8639ea4>] ? intel_disable_plane+0x64/0x80 [i915]
[92146.548417] [<f8639f7c>] i9xx_crtc_disable+0xbc/0x150 [i915]
[92146.548456] [<f863ebee>] intel_crtc_update_dpms+0x5e/0x90 [i915]
[92146.548493] [<f86437cf>] intel_modeset_setup_hw_state+0x42f/0x8f0 [i915]
[92146.548535] [<f8645b0b>] intel_lid_notify+0x9b/0xc0 [i915]
[92146.548543] [<c15610d3>] notifier_call_chain+0x43/0x60
[92146.548550] [<c105d1e1>] __blocking_notifier_call_chain+0x41/0x80
[92146.548556] [<c105d23f>] blocking_notifier_call_chain+0x1f/0x30
[92146.548563] [<c131a684>] acpi_lid_send_state+0x78/0xa4
[92146.548569] [<c131aa9e>] acpi_button_notify+0x3b/0xf1
[92146.548577] [<c12df56a>] ? acpi_os_execute+0x17/0x19
[92146.548582] [<c12e591a>] ? acpi_ec_sync_query+0xa5/0xbc
[92146.548589] [<c12e2b82>] acpi_device_notify+0x16/0x18
[92146.548595] [<c12f4904>] acpi_ev_notify_dispatch+0x38/0x4f
[92146.548600] [<c12df0e8>] acpi_os_execute_deferred+0x20/0x2b
[92146.548607] [<c1051208>] process_one_work+0x128/0x3f0
[92146.548613] [<c1564f73>] ? common_interrupt+0x33/0x38
[92146.548618] [<c104f8c0>] ? wake_up_worker+0x30/0x30
[92146.548624] [<c12df0c8>] ? acpi_os_wait_events_complete+0x1e/0x1e
[92146.548629] [<c10524f9>] worker_thread+0x119/0x3b0
[92146.548634] [<c10523e0>] ? manage_workers+0x240/0x240
[92146.548640] [<c1056e84>] kthread+0x94/0xa0
[92146.548647] [<c1060000>] ? ftrace_raw_output_sched_stat_runtime+0x70/0xf0
[92146.548652] [<c15649b7>] ret_from_kernel_thread+0x1b/0x28
[92146.548658] [<c1056df0>] ? kthread_create_on_node+0xc0/0xc0
three different modeset flags are introduced in this patch
MODESET_ON_LID_OPEN: do modeset on next lid open event
MODESET_DONE: modeset already done
MODESET_SUSPENDED: suspended, only do modeset when system is resumed
In this way,
1. when lid is closed, MODESET_ON_LID_OPEN is set so that
we'll do modeset on next lid open event.
2. when lid is opened, MODESET_DONE is set
so that duplicate lid open events will be ignored.
3. when system suspends, MODESET_SUSPENDED is set.
In this case, we will not do modeset on any lid events.
Plus, locking mechanism is also introduced to avoid racing.
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-02-05 14:41:53 +07:00
|
|
|
enum modeset_restore {
|
|
|
|
MODESET_ON_LID_OPEN,
|
|
|
|
MODESET_DONE,
|
|
|
|
MODESET_SUSPENDED,
|
|
|
|
};
|
|
|
|
|
2013-09-13 03:06:24 +07:00
|
|
|
struct ddi_vbt_port_info {
|
2014-08-01 17:07:54 +07:00
|
|
|
/*
|
|
|
|
* This is an index in the HDMI/DVI DDI buffer translation table.
|
|
|
|
* The special value HDMI_LEVEL_SHIFT_UNKNOWN means the VBT didn't
|
|
|
|
* populate this field.
|
|
|
|
*/
|
|
|
|
#define HDMI_LEVEL_SHIFT_UNKNOWN 0xff
|
2013-09-13 03:06:24 +07:00
|
|
|
uint8_t hdmi_level_shift;
|
2013-09-13 03:12:18 +07:00
|
|
|
|
|
|
|
uint8_t supports_dvi:1;
|
|
|
|
uint8_t supports_hdmi:1;
|
|
|
|
uint8_t supports_dp:1;
|
2013-09-13 03:06:24 +07:00
|
|
|
};
|
|
|
|
|
2014-03-28 11:44:57 +07:00
|
|
|
enum drrs_support_type {
|
|
|
|
DRRS_NOT_SUPPORTED = 0,
|
|
|
|
STATIC_DRRS_SUPPORT = 1,
|
|
|
|
SEAMLESS_DRRS_SUPPORT = 2
|
|
|
|
};
|
|
|
|
|
2014-11-14 23:52:30 +07:00
|
|
|
enum psr_lines_to_wait {
|
|
|
|
PSR_0_LINES_TO_WAIT = 0,
|
|
|
|
PSR_1_LINE_TO_WAIT,
|
|
|
|
PSR_4_LINES_TO_WAIT,
|
|
|
|
PSR_8_LINES_TO_WAIT
|
|
|
|
};
|
|
|
|
|
2013-05-10 06:03:18 +07:00
|
|
|
struct intel_vbt_data {
|
|
|
|
struct drm_display_mode *lfp_lvds_vbt_mode; /* if any */
|
|
|
|
struct drm_display_mode *sdvo_lvds_vbt_mode; /* if any */
|
|
|
|
|
|
|
|
/* Feature bits */
|
|
|
|
unsigned int int_tv_support:1;
|
|
|
|
unsigned int lvds_dither:1;
|
|
|
|
unsigned int lvds_vbt:1;
|
|
|
|
unsigned int int_crt_support:1;
|
|
|
|
unsigned int lvds_use_ssc:1;
|
|
|
|
unsigned int display_clock_mode:1;
|
|
|
|
unsigned int fdi_rx_polarity_inverted:1;
|
2014-05-27 21:03:59 +07:00
|
|
|
unsigned int has_mipi:1;
|
2013-05-10 06:03:18 +07:00
|
|
|
int lvds_ssc_freq;
|
|
|
|
unsigned int bios_lvds_val; /* initial [PCH_]LVDS reg val in VBIOS */
|
|
|
|
|
2014-03-28 11:44:57 +07:00
|
|
|
enum drrs_support_type drrs_type;
|
|
|
|
|
2013-05-10 06:03:18 +07:00
|
|
|
/* eDP */
|
|
|
|
int edp_rate;
|
|
|
|
int edp_lanes;
|
|
|
|
int edp_preemphasis;
|
|
|
|
int edp_vswing;
|
|
|
|
bool edp_initialized;
|
|
|
|
bool edp_support;
|
|
|
|
int edp_bpp;
|
|
|
|
struct edp_power_seq edp_pps;
|
|
|
|
|
2014-11-14 23:52:30 +07:00
|
|
|
struct {
|
|
|
|
bool full_link;
|
|
|
|
bool require_aux_wakeup;
|
|
|
|
int idle_frames;
|
|
|
|
enum psr_lines_to_wait lines_to_wait;
|
|
|
|
int tp1_wakeup_time;
|
|
|
|
int tp2_tp3_wakeup_time;
|
|
|
|
} psr;
|
|
|
|
|
2013-12-15 05:38:29 +07:00
|
|
|
struct {
|
|
|
|
u16 pwm_freq_hz;
|
2014-04-09 15:22:06 +07:00
|
|
|
bool present;
|
2013-12-15 05:38:29 +07:00
|
|
|
bool active_low_pwm;
|
2014-06-24 22:27:39 +07:00
|
|
|
u8 min_brightness; /* min_brightness/255 of max */
|
2013-12-15 05:38:29 +07:00
|
|
|
} backlight;
|
|
|
|
|
2013-08-27 19:12:25 +07:00
|
|
|
/* MIPI DSI */
|
|
|
|
struct {
|
2014-05-27 21:03:59 +07:00
|
|
|
u16 port;
|
2013-08-27 19:12:25 +07:00
|
|
|
u16 panel_id;
|
2014-04-14 12:30:34 +07:00
|
|
|
struct mipi_config *config;
|
|
|
|
struct mipi_pps_data *pps;
|
|
|
|
u8 seq_version;
|
|
|
|
u32 size;
|
|
|
|
u8 *data;
|
|
|
|
u8 *sequence[MIPI_SEQ_MAX];
|
2013-08-27 19:12:25 +07:00
|
|
|
} dsi;
|
|
|
|
|
2013-05-10 06:03:18 +07:00
|
|
|
int crt_ddc_pin;
|
|
|
|
|
|
|
|
int child_dev_num;
|
2013-09-12 04:02:47 +07:00
|
|
|
union child_device_config *child_dev;
|
2013-09-13 03:06:24 +07:00
|
|
|
|
|
|
|
struct ddi_vbt_port_info ddi_port_info[I915_MAX_PORTS];
|
2013-05-10 06:03:18 +07:00
|
|
|
};
|
|
|
|
|
2013-08-07 02:24:04 +07:00
|
|
|
enum intel_ddb_partitioning {
|
|
|
|
INTEL_DDB_PART_1_2,
|
|
|
|
INTEL_DDB_PART_5_6, /* IVB+ */
|
|
|
|
};
|
|
|
|
|
2013-08-07 02:24:05 +07:00
|
|
|
struct intel_wm_level {
|
|
|
|
bool enable;
|
|
|
|
uint32_t pri_val;
|
|
|
|
uint32_t spr_val;
|
|
|
|
uint32_t cur_val;
|
|
|
|
uint32_t fbc_val;
|
|
|
|
};
|
|
|
|
|
2013-12-17 19:46:36 +07:00
|
|
|
struct ilk_wm_values {
|
2013-10-09 23:18:03 +07:00
|
|
|
uint32_t wm_pipe[3];
|
|
|
|
uint32_t wm_lp[3];
|
|
|
|
uint32_t wm_lp_spr[3];
|
|
|
|
uint32_t wm_linetime[3];
|
|
|
|
bool enable_fbc_wm;
|
|
|
|
enum intel_ddb_partitioning partitioning;
|
|
|
|
};
|
|
|
|
|
2014-11-05 00:06:41 +07:00
|
|
|
struct skl_ddb_entry {
|
2014-11-05 00:06:53 +07:00
|
|
|
uint16_t start, end; /* in number of blocks, 'end' is exclusive */
|
2014-11-05 00:06:41 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
static inline uint16_t skl_ddb_entry_size(const struct skl_ddb_entry *entry)
|
|
|
|
{
|
2014-11-05 00:06:53 +07:00
|
|
|
return entry->end - entry->start;
|
2014-11-05 00:06:41 +07:00
|
|
|
}
|
|
|
|
|
2014-11-05 00:06:52 +07:00
|
|
|
static inline bool skl_ddb_entry_equal(const struct skl_ddb_entry *e1,
|
|
|
|
const struct skl_ddb_entry *e2)
|
|
|
|
{
|
|
|
|
if (e1->start == e2->start && e1->end == e2->end)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-11-05 00:06:41 +07:00
|
|
|
struct skl_ddb_allocation {
|
2014-11-05 00:07:01 +07:00
|
|
|
struct skl_ddb_entry pipe[I915_MAX_PIPES];
|
2014-11-05 00:06:41 +07:00
|
|
|
struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES];
|
|
|
|
struct skl_ddb_entry cursor[I915_MAX_PIPES];
|
|
|
|
};
|
|
|
|
|
2014-11-05 00:06:40 +07:00
|
|
|
struct skl_wm_values {
|
|
|
|
bool dirty[I915_MAX_PIPES];
|
2014-11-05 00:06:41 +07:00
|
|
|
struct skl_ddb_allocation ddb;
|
2014-11-05 00:06:40 +07:00
|
|
|
uint32_t wm_linetime[I915_MAX_PIPES];
|
|
|
|
uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8];
|
|
|
|
uint32_t cursor[I915_MAX_PIPES][8];
|
|
|
|
uint32_t plane_trans[I915_MAX_PIPES][I915_MAX_PLANES];
|
|
|
|
uint32_t cursor_trans[I915_MAX_PIPES];
|
|
|
|
};
|
|
|
|
|
|
|
|
struct skl_wm_level {
|
|
|
|
bool plane_en[I915_MAX_PLANES];
|
2014-11-05 00:06:56 +07:00
|
|
|
bool cursor_en;
|
2014-11-05 00:06:40 +07:00
|
|
|
uint16_t plane_res_b[I915_MAX_PLANES];
|
|
|
|
uint8_t plane_res_l[I915_MAX_PLANES];
|
|
|
|
uint16_t cursor_res_b;
|
|
|
|
uint8_t cursor_res_l;
|
|
|
|
};
|
|
|
|
|
2013-08-19 23:18:09 +07:00
|
|
|
/*
|
2014-03-08 06:08:18 +07:00
|
|
|
* This struct helps tracking the state needed for runtime PM, which puts the
|
|
|
|
* device in PCI D3 state. Notice that when this happens, nothing on the
|
|
|
|
* graphics device works, even register access, so we don't get interrupts nor
|
|
|
|
* anything else.
|
2013-08-19 23:18:09 +07:00
|
|
|
*
|
2014-03-08 06:08:18 +07:00
|
|
|
* Every piece of our code that needs to actually touch the hardware needs to
|
|
|
|
* either call intel_runtime_pm_get or call intel_display_power_get with the
|
|
|
|
* appropriate power domain.
|
drm/i915: make PC8 be part of runtime PM suspend/resume
Currently, when our driver becomes idle for i915.pc8_timeout (default:
5s) we enable PC8, so we save some power, but not everything we can.
Then, while PC8 is enabled, if we stay idle for more
autosuspend_delay_ms (default: 10s) we'll enter runtime PM and put the
graphics device in D3 state, saving even more power. The two features
are separate things with increasing levels of power savings, but if we
disable PC8 we'll never get into D3.
While from the modularity point of view it would be nice to keep these
features as separate, we have reasons to merge them:
- We are not aware of anybody wanting a "PC8 without D3" environment.
- If we keep both features as separate, we'll have to to test both
PC8 and PC8+D3 code paths. We're already having a major pain to
make QA do automated testing of just one thing, testing both paths
will cost even more.
- Only Haswell+ supports PC8, so if we want to add runtime PM support
to, for example, IVB, we'll have to copy some code from the PC8
feature to runtime PM, so merging both features as a single thing
will make it easier for enabling runtime PM on other platforms.
This patch only does the very basic steps required to have PC8 and
runtime PM merged on a single feature: the next patches will take care
of cleaning up everything.
v2: - Rebase.
v3: - Rebase.
- Fully remove the deprecated i915 params since Daniel doesn't
consider them as part of the ABI.
v4: - Rebase.
- Fix typo in the commit message.
v5: - Rebase, again.
- Add a huge comment explaining the different forcewake usage
(Chris, Daniel).
- Use open-coded forcewake functions (Daniel).
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-03-08 06:08:05 +07:00
|
|
|
*
|
2014-03-08 06:08:18 +07:00
|
|
|
* Our driver uses the autosuspend delay feature, which means we'll only really
|
|
|
|
* suspend if we stay with zero refcount for a certain amount of time. The
|
2014-09-30 15:56:39 +07:00
|
|
|
* default value is currently very conservative (see intel_runtime_pm_enable), but
|
2014-03-08 06:08:18 +07:00
|
|
|
* it can be changed with the standard runtime PM files from sysfs.
|
2013-08-19 23:18:09 +07:00
|
|
|
*
|
|
|
|
* The irqs_disabled variable becomes true exactly after we disable the IRQs and
|
|
|
|
* goes back to false exactly before we reenable the IRQs. We use this variable
|
|
|
|
* to check if someone is trying to enable/disable IRQs while they're supposed
|
|
|
|
* to be disabled. This shouldn't happen and we'll print some error messages in
|
2014-03-08 06:12:32 +07:00
|
|
|
* case it happens.
|
2013-08-19 23:18:09 +07:00
|
|
|
*
|
2014-03-08 06:08:18 +07:00
|
|
|
* For more, read the Documentation/power/runtime_pm.txt.
|
2013-08-19 23:18:09 +07:00
|
|
|
*/
|
2014-03-08 06:08:15 +07:00
|
|
|
struct i915_runtime_pm {
|
|
|
|
bool suspended;
|
2014-09-30 15:56:43 +07:00
|
|
|
bool irqs_enabled;
|
2013-08-19 23:18:09 +07:00
|
|
|
};
|
|
|
|
|
2013-10-16 18:30:34 +07:00
|
|
|
enum intel_pipe_crc_source {
|
|
|
|
INTEL_PIPE_CRC_SOURCE_NONE,
|
|
|
|
INTEL_PIPE_CRC_SOURCE_PLANE1,
|
|
|
|
INTEL_PIPE_CRC_SOURCE_PLANE2,
|
|
|
|
INTEL_PIPE_CRC_SOURCE_PF,
|
2013-10-17 03:55:48 +07:00
|
|
|
INTEL_PIPE_CRC_SOURCE_PIPE,
|
2013-10-17 03:55:58 +07:00
|
|
|
/* TV/DP on pre-gen5/vlv can't use the pipe source. */
|
|
|
|
INTEL_PIPE_CRC_SOURCE_TV,
|
|
|
|
INTEL_PIPE_CRC_SOURCE_DP_B,
|
|
|
|
INTEL_PIPE_CRC_SOURCE_DP_C,
|
|
|
|
INTEL_PIPE_CRC_SOURCE_DP_D,
|
2013-11-01 16:50:20 +07:00
|
|
|
INTEL_PIPE_CRC_SOURCE_AUTO,
|
2013-10-16 18:30:34 +07:00
|
|
|
INTEL_PIPE_CRC_SOURCE_MAX,
|
|
|
|
};
|
|
|
|
|
2013-10-16 00:55:27 +07:00
|
|
|
struct intel_pipe_crc_entry {
|
2013-10-16 00:55:30 +07:00
|
|
|
uint32_t frame;
|
2013-10-16 00:55:27 +07:00
|
|
|
uint32_t crc[5];
|
|
|
|
};
|
|
|
|
|
2013-10-16 00:55:29 +07:00
|
|
|
#define INTEL_PIPE_CRC_ENTRIES_NR 128
|
2013-10-16 00:55:27 +07:00
|
|
|
struct intel_pipe_crc {
|
2013-10-21 20:29:30 +07:00
|
|
|
spinlock_t lock;
|
|
|
|
bool opened; /* exclusive access to the result file */
|
2013-10-16 00:55:34 +07:00
|
|
|
struct intel_pipe_crc_entry *entries;
|
2013-10-16 18:30:34 +07:00
|
|
|
enum intel_pipe_crc_source source;
|
2013-10-21 20:29:30 +07:00
|
|
|
int head, tail;
|
2013-10-16 00:55:40 +07:00
|
|
|
wait_queue_head_t wq;
|
2013-10-16 00:55:27 +07:00
|
|
|
};
|
|
|
|
|
drm/i915: Track frontbuffer invalidation/flushing
So these are the guts of the new beast. This tracks when a frontbuffer
gets invalidated (due to frontbuffer rendering) and hence should be
constantly scaned out, and when it's flushed again and can be
compressed/one-shot-upload.
Rules for flushing are simple: The frontbuffer needs one more full
upload starting from the next vblank. Which means that the flushing
can _only_ be called once the frontbuffer update has been latched.
But this poses a problem for pageflips: We can't just delay the
flushing until the pageflip is latched, since that would pose the risk
that we override frontbuffer rendering that has been scheduled
in-between the pageflip ioctl and the actual latching.
To handle this track asynchronous invalidations (and also pageflip)
state per-ring and delay any in-between flushing until the rendering
has completed. And also cancel any delayed flushing if we get a new
invalidation request (whether delayed or not).
Also call intel_mark_fb_busy in both cases in all cases to make sure
that we keep the screen at the highest refresh rate both on flips,
synchronous plane updates and for frontbuffer rendering.
v2: Lots of improvements
Suggestions from Chris:
- Move invalidate/flush in flush_*_domain and set_to_*_domain.
- Drop the flush in busy_ioctl since it's redundant. Was a leftover
from an earlier concept to track flips/delayed flushes.
- Don't forget about the initial modeset enable/final disable.
Suggested by Chris.
Track flips accurately, too. Since flips complete independently of
rendering we need to track pending flips in a separate mask. Again if
an invalidate happens we need to cancel the evenutal flush to avoid
races.
v3:
Provide correct header declarations for flip functions. Currently not
needed outside of intel_display.c, but part of the proper interface.
v4: Add proper domain management to fbcon so that the fbcon buffer is
also tracked correctly.
v5: Fixup locking around the fbcon set_to_gtt_domain call.
v6: More comments from Chris:
- Split out fbcon changes.
- Drop superflous checks for potential scanout before calling intel_fb
functions - we can micro-optimize this later.
- s/intel_fb_/intel_fb_obj_/ to make it clear that this deals in gem
object. We already have precedence for fb_obj in the pin_and_fence
functions.
v7: Clarify the semantics of the flip flush handling by renaming
things a bit:
- Don't go through a gem object but take the relevant frontbuffer bits
directly. These functions center on the plane, the actual object is
irrelevant - even a flip to the same object as already active should
cause a flush.
- Add a new intel_frontbuffer_flip for synchronous plane updates. It
currently just calls intel_frontbuffer_flush since the implemenation
differs.
This way we achieve a clear split between one-shot update events on
one side and frontbuffer rendering with potentially a very long delay
between the invalidate and flush.
Chris and I also had some discussions about mark_busy and whether it
is appropriate to call from flush. But mark busy is a state which
should be derived from the 3 events (invalidate, flush, flip) we now
have by the users, like psr does by tracking relevant information in
psr.busy_frontbuffer_bits. DRRS (the only real use of mark_busy for
frontbuffer) needs to have similar logic. With that the overall
mark_busy in the core could be removed.
v8: Only when retiring gpu buffers only flush frontbuffer bits we
actually invalidated in a batch. Just for safety since before any
additional usage/invalidate we should always retire current rendering.
Suggested by Chris Wilson.
v9: Actually use intel_frontbuffer_flip in all appropriate places.
Spotted by Chris.
v10: Address more comments from Chris:
- Don't call _flip in set_base when the crtc is inactive, avoids redunancy
in the modeset case with the initial enabling of all planes.
- Add comments explaining that the initial/final plane enable/disable
still has work left to do before it's fully generic.
v11: Only invalidate for gtt/cpu access when writing. Spotted by Chris.
v12: s/_flush/_flip/ in intel_overlay.c per Chris' comment.
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-06-19 21:01:59 +07:00
|
|
|
struct i915_frontbuffer_tracking {
|
|
|
|
struct mutex lock;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tracking bits for delayed frontbuffer flushing du to gpu activity or
|
|
|
|
* scheduled flips.
|
|
|
|
*/
|
|
|
|
unsigned busy_bits;
|
|
|
|
unsigned flip_bits;
|
|
|
|
};
|
|
|
|
|
2014-10-07 21:21:26 +07:00
|
|
|
struct i915_wa_reg {
|
|
|
|
u32 addr;
|
|
|
|
u32 value;
|
|
|
|
/* bitmask representing WA bits */
|
|
|
|
u32 mask;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define I915_MAX_WA_REGS 16
|
|
|
|
|
|
|
|
struct i915_workarounds {
|
|
|
|
struct i915_wa_reg reg[I915_MAX_WA_REGS];
|
|
|
|
u32 count;
|
|
|
|
};
|
|
|
|
|
2014-03-31 18:27:22 +07:00
|
|
|
struct drm_i915_private {
|
2012-11-03 01:55:02 +07:00
|
|
|
struct drm_device *dev;
|
2012-11-15 18:32:30 +07:00
|
|
|
struct kmem_cache *slab;
|
2012-11-03 01:55:02 +07:00
|
|
|
|
2014-02-08 02:12:48 +07:00
|
|
|
const struct intel_device_info info;
|
2012-11-03 01:55:02 +07:00
|
|
|
|
|
|
|
int relative_constants_mode;
|
|
|
|
|
|
|
|
void __iomem *regs;
|
|
|
|
|
2013-07-20 02:36:52 +07:00
|
|
|
struct intel_uncore uncore;
|
2012-11-03 01:55:02 +07:00
|
|
|
|
|
|
|
struct intel_gmbus gmbus[GMBUS_NUM_PORTS];
|
|
|
|
|
2012-12-01 19:53:45 +07:00
|
|
|
|
2012-11-03 01:55:02 +07:00
|
|
|
/** gmbus_mutex protects against concurrent usage of the single hw gmbus
|
|
|
|
* controller on different i2c buses. */
|
|
|
|
struct mutex gmbus_mutex;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Base address of the gmbus and gpio block.
|
|
|
|
*/
|
|
|
|
uint32_t gpio_mmio_base;
|
|
|
|
|
2014-05-19 22:24:03 +07:00
|
|
|
/* MMIO base address for MIPI regs */
|
|
|
|
uint32_t mipi_mmio_base;
|
|
|
|
|
2012-12-01 19:53:45 +07:00
|
|
|
wait_queue_head_t gmbus_wait_queue;
|
|
|
|
|
2012-11-03 01:55:02 +07:00
|
|
|
struct pci_dev *bridge_dev;
|
2014-05-22 20:13:33 +07:00
|
|
|
struct intel_engine_cs ring[I915_NUM_RINGS];
|
2014-06-30 23:53:37 +07:00
|
|
|
struct drm_i915_gem_object *semaphore_obj;
|
2012-12-10 20:41:48 +07:00
|
|
|
uint32_t last_seqno, next_seqno;
|
2012-11-03 01:55:02 +07:00
|
|
|
|
2014-09-11 12:43:25 +07:00
|
|
|
struct drm_dma_handle *status_page_dmah;
|
2012-11-03 01:55:02 +07:00
|
|
|
struct resource mch_res;
|
|
|
|
|
|
|
|
/* protects the irq masks */
|
|
|
|
spinlock_t irq_lock;
|
|
|
|
|
drm/i915: Replaced Blitter ring based flips with MMIO flips
This patch enables the framework for using MMIO based flip calls,
in contrast with the CS based flip calls which are being used currently.
MMIO based flip calls can be enabled on architectures where
Render and Blitter engines reside in different power wells. The
decision to use MMIO flips can be made based on workloads to give
100% residency for Media power well.
v2: The MMIO flips now use the interrupt driven mechanism for issuing the
flips when target seqno is reached. (Incorporating Ville's idea)
v3: Rebasing on latest code. Code restructuring after incorporating
Damien's comments
v4: Addressing Ville's review comments
-general cleanup
-updating only base addr instead of calling update_primary_plane
-extending patch for gen5+ platforms
v5: Addressed Ville's review comments
-Making mmio flip vs cs flip selection based on module parameter
-Adding check for DRIVER_MODESET feature in notify_ring before calling
notify mmio flip.
-Other changes mostly in function arguments
v6: -Having a seperate function to check condition for using mmio flips (Ville)
-propogating error code from i915_gem_check_olr (Ville)
v7: -Adding __must_check with i915_gem_check_olr (Chris)
-Renaming mmio_flip_data to mmio_flip (Chris)
-Rebasing on latest nightly
v8: -Rebasing on latest code
-squash 3rd patch in series(mmio setbase vs page flip race) with this patch
-Added new tiling mode update in intel_do_mmio_flip (Chris)
v9: -check for obj->last_write_seqno being 0 instead of obj->ring being NULL in
intel_postpone_flip, as this is a more restrictive condition (Chris)
v10: -Applied Chris's suggestions for squashing patches 2,3 into this patch.
These patches make the selection of CS vs MMIO flip at the page flip time, and
make the module parameter for using mmio flips as tristate, the states being
'force CS flips', 'force mmio flips', 'driver discretion'.
Changed the logic for driver discretion (Chris)
v11: Minor code cleanup(better readability, fixing whitespace errors, using
lockdep to check mutex locked status in postpone_flip, removal of __must_check
in function definition) (Chris)
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Akash Goel <akash.goel@intel.com>
Tested-by: Chris Wilson <chris@chris-wilson.co.uk> # snb, ivb
[danvet: Fix up parameter alignement checkpatch spotted.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-06-02 18:17:17 +07:00
|
|
|
/* protects the mmio flip data */
|
|
|
|
spinlock_t mmio_flip_lock;
|
|
|
|
|
2014-03-05 00:23:07 +07:00
|
|
|
bool display_irqs_enabled;
|
|
|
|
|
drm/i915: irq-drive the dp aux communication
At least on the platforms that have a dp aux irq and also have it
enabled - vlvhsw should have one, too. But I don't have a machine to
test this on. Judging from docs there's no dp aux interrupt for gm45.
Also, I only have an ivb cpu edp machine, so the dp aux A code for
snb/ilk is untested.
For dpcd probing when nothing is connected it slashes about 5ms of cpu
time (cpu time is now negligible), which agrees with 3 * 5 400 usec
timeouts.
A previous version of this patch increases the time required to go
through the dp_detect cycle (which includes reading the edid) from
around 33 ms to around 40 ms. Experiments indicated that this is
purely due to the irq latency - the hw doesn't allow us to queue up
dp aux transactions and hence irq latency directly affects throughput.
gmbus is much better, there we have a 8 byte buffer, and we get the
irq once another 4 bytes can be queued up.
But by using the pm_qos interface to request the lowest possible cpu
wake-up latency this slowdown completely disappeared.
Since all our output detection logic is single-threaded with the
mode_config mutex right now anyway, I've decide not ot play fancy and
to just reuse the gmbus wait queue. But this would definitely prep the
way to run dp detection on different ports in parallel
v2: Add a timeout for dp aux transfers when using interrupts - the hw
_does_ prevent this with the hw-based 400 usec timeout, but if the
irq somehow doesn't arrive we're screwed. Lesson learned while
developing this ;-)
v3: While at it also convert the busy-loop to wait_for_atomic, so that
we don't run the risk of an infinite loop any more.
v4: Ensure we have the smallest possible irq latency by using the
pm_qos interface.
v5: Add a comment to the code to explain why we frob pm_qos. Suggested
by Chris Wilson.
v6: Disable dp irq for vlv, that's easier than trying to get at docs
and hw.
v7: Squash in a fix for Haswell that Paulo Zanoni tracked down - the
dp aux registers aren't at a fixed offset any more, but can be on the
PCH while the DP port is on the cpu die.
Reviewed-by: Imre Deak <imre.deak@intel.com> (v6)
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-12-01 19:53:48 +07:00
|
|
|
/* To control wakeup latency, e.g. for irq-driven dp aux transfers. */
|
|
|
|
struct pm_qos_request pm_qos;
|
|
|
|
|
2012-11-03 01:55:02 +07:00
|
|
|
/* DPIO indirect register protection */
|
2012-12-12 20:06:44 +07:00
|
|
|
struct mutex dpio_lock;
|
2012-11-03 01:55:02 +07:00
|
|
|
|
|
|
|
/** Cached value of IMR to avoid reads in updating the bitfield */
|
drm/i915/bdw: Implement interrupt changes
The interrupt handling implementation remains the same as previous
generations with the 4 types of registers, status, identity, mask, and
enable. However the layout of where the bits go have changed entirely.
To address these changes, all of the interrupt vfuncs needed special
gen8 code.
The way it works is there is a top level status register now which
informs the interrupt service routine which unit caused the interrupt,
and therefore which interrupt registers to read to process the
interrupt. For display the division is quite logical, a set of interrupt
registers for each pipe, and in addition to those, a set each for "misc"
and port.
For GT the things get a bit hairy, as seen by the code. Each of the GT
units has it's own bits defined. They all look *very similar* and
resides in 16 bits of a GT register. As an example, RCS and BCS share
register 0. To compact the code a bit, at a slight expense to
complexity, this is exactly how the code works as well. 2 structures are
added to the ring buffer so that our ring buffer interrupt handling code
knows which ring shares the interrupt registers, and a shift value (ie.
the top or bottom 16 bits of the register).
The above allows us to kept the interrupt register caching scheme, the
per interrupt enables, and the code to mask and unmask interrupts
relatively clean (again at the cost of some more complexity).
Most of the GT units mentioned above are command streamers, and so the
symmetry should work quite well for even the yet to be implemented rings
which Broadwell adds.
v2: Fixes up a couple of bugs, and is more verbose about errors in the
Broadwell interrupt handler.
v3: fix DE_MISC IER offset
v4: Simplify interrupts:
I totally misread the docs the first time I implemented interrupts, and
so this should greatly simplify the mess. Unlike GEN6, we never touch
the regular mask registers in irq_get/put.
v5: Rebased on to of recent pch hotplug setup changes.
v6: Fixup on top of moving num_pipes to intel_info.
v7: Rebased on top of Egbert Eich's hpd irq handling rework. Also
wired up ibx_hpd_irq_setup for gen8.
v8: Rebase on top of Jani's asle handling rework.
v9: Rebase on top of Ben's VECS enabling for Haswell, where he
unfortunately went OCD on the gt irq #defines. Not that they're still
not yet fully consistent:
- Used the GT_RENDER_ #defines + bdw shifts.
- Dropped the shift from the L3_PARITY stuff, seemed clearer.
- s/irq_refcount/irq_refcount.gt/
v10: Squash in VECS enabling patches and the gen8_gt_irq_handler
refactoring from Zhao Yakui <yakui.zhao@intel.com>
v11: Rebase on top of the interrupt cleanups in upstream.
v12: Rebase on top of Ben's DPF changes in upstream.
v13: Drop bdw from the HAS_L3_DPF feature flag for now, it's unclear what
exactly needs to be done. Requested by Ben.
v14: Fix the patch.
- Drop the mask of reserved bits and assorted logic, it doesn't match
the spec.
- Do the posting read inconditionally instead of commenting it out.
- Add a GEN8_MASTER_IRQ_CONTROL definition and use it.
- Fix up the GEN8_PIPE interrupt defines and give the GEN8_ prefixes -
we actually will need to use them.
- Enclose macros in do {} while (0) (checkpatch).
- Clear DE_MISC interrupt bits only after having processed them.
- Fix whitespace fail (checkpatch).
- Fix overtly long lines where appropriate (checkpatch).
- Don't use typedef'ed private_t (maintainer-scripts).
- Align the function parameter list correctly.
Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v4)
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
bikeshed
2013-11-03 11:07:09 +07:00
|
|
|
union {
|
|
|
|
u32 irq_mask;
|
|
|
|
u32 de_irq_mask[I915_MAX_PIPES];
|
|
|
|
};
|
2012-11-03 01:55:02 +07:00
|
|
|
u32 gt_irq_mask;
|
2013-08-07 04:57:15 +07:00
|
|
|
u32 pm_irq_mask;
|
2014-03-15 21:53:22 +07:00
|
|
|
u32 pm_rps_events;
|
2014-02-10 23:42:49 +07:00
|
|
|
u32 pipestat_irq_mask[I915_MAX_PIPES];
|
2012-11-03 01:55:02 +07:00
|
|
|
|
|
|
|
struct work_struct hotplug_work;
|
2013-04-16 18:36:54 +07:00
|
|
|
struct {
|
|
|
|
unsigned long hpd_last_jiffies;
|
|
|
|
int hpd_cnt;
|
|
|
|
enum {
|
|
|
|
HPD_ENABLED = 0,
|
|
|
|
HPD_DISABLED = 1,
|
|
|
|
HPD_MARK_DISABLED = 2
|
|
|
|
} hpd_mark;
|
|
|
|
} hpd_stats[HPD_NUM_PINS];
|
2013-04-11 20:57:57 +07:00
|
|
|
u32 hpd_event_bits;
|
2014-08-18 19:37:02 +07:00
|
|
|
struct delayed_work hotplug_reenable_work;
|
2012-11-03 01:55:02 +07:00
|
|
|
|
2013-06-28 06:30:21 +07:00
|
|
|
struct i915_fbc fbc;
|
2014-04-05 13:43:28 +07:00
|
|
|
struct i915_drrs drrs;
|
2012-11-03 01:55:02 +07:00
|
|
|
struct intel_opregion opregion;
|
2013-05-10 06:03:18 +07:00
|
|
|
struct intel_vbt_data vbt;
|
2012-11-03 01:55:02 +07:00
|
|
|
|
2014-10-10 02:57:43 +07:00
|
|
|
bool preserve_bios_swizzle;
|
|
|
|
|
2012-11-03 01:55:02 +07:00
|
|
|
/* overlay */
|
|
|
|
struct intel_overlay *overlay;
|
|
|
|
|
2013-11-08 21:48:54 +07:00
|
|
|
/* backlight registers and fields in struct intel_panel */
|
2014-09-15 19:35:09 +07:00
|
|
|
struct mutex backlight_lock;
|
2013-04-02 19:48:09 +07:00
|
|
|
|
2012-11-03 01:55:02 +07:00
|
|
|
/* LVDS info */
|
|
|
|
bool no_aux_handshake;
|
|
|
|
|
2014-09-04 18:53:14 +07:00
|
|
|
/* protects panel power sequencer state */
|
|
|
|
struct mutex pps_mutex;
|
|
|
|
|
2012-11-03 01:55:02 +07:00
|
|
|
struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */
|
|
|
|
int fence_reg_start; /* 4 if userland hasn't ioctl'd us yet */
|
|
|
|
int num_fence_regs; /* 8 on pre-965, 16 otherwise */
|
|
|
|
|
|
|
|
unsigned int fsb_freq, mem_freq, is_ddr3;
|
2014-03-27 22:45:10 +07:00
|
|
|
unsigned int vlv_cdclk_freq;
|
2014-10-07 21:41:22 +07:00
|
|
|
unsigned int hpll_freq;
|
2012-11-03 01:55:02 +07:00
|
|
|
|
2013-09-02 21:22:25 +07:00
|
|
|
/**
|
|
|
|
* wq - Driver workqueue for GEM.
|
|
|
|
*
|
|
|
|
* NOTE: Work items scheduled here are not allowed to grab any modeset
|
|
|
|
* locks, for otherwise the flushing done in the pageflip code will
|
|
|
|
* result in deadlocks.
|
|
|
|
*/
|
2012-11-03 01:55:02 +07:00
|
|
|
struct workqueue_struct *wq;
|
|
|
|
|
|
|
|
/* Display functions */
|
|
|
|
struct drm_i915_display_funcs display;
|
|
|
|
|
|
|
|
/* PCH chipset type */
|
|
|
|
enum intel_pch pch_type;
|
2012-11-21 00:12:07 +07:00
|
|
|
unsigned short pch_id;
|
2012-11-03 01:55:02 +07:00
|
|
|
|
|
|
|
unsigned long quirks;
|
|
|
|
|
i915: ignore lid open event when resuming
i915 driver needs to do modeset when
1. system resumes from sleep
2. lid is opened
In PM_SUSPEND_MEM state, all the GPEs are cleared when system resumes,
thus it is the i915_resume code does the modeset rather than intel_lid_notify().
But in PM_SUSPEND_FREEZE state, this will be broken because
system is still responsive to the lid events.
1. When we close the lid in Freeze state, intel_lid_notify() sets modeset_on_lid.
2. When we reopen the lid, intel_lid_notify() will do a modeset,
before the system is resumed.
here is the error log,
[92146.548074] WARNING: at drivers/gpu/drm/i915/intel_display.c:1028 intel_wait_for_pipe_off+0x184/0x190 [i915]()
[92146.548076] Hardware name: VGN-Z540N
[92146.548078] pipe_off wait timed out
[92146.548167] Modules linked in: hid_generic usbhid hid snd_hda_codec_realtek snd_hda_intel snd_hda_codec parport_pc snd_hwdep ppdev snd_pcm_oss i915 snd_mixer_oss snd_pcm arc4 iwldvm snd_seq_dummy mac80211 snd_seq_oss snd_seq_midi fbcon tileblit font bitblit softcursor drm_kms_helper snd_rawmidi snd_seq_midi_event coretemp drm snd_seq kvm btusb bluetooth snd_timer iwlwifi pcmcia tpm_infineon i2c_algo_bit joydev snd_seq_device intel_agp cfg80211 snd intel_gtt yenta_socket pcmcia_rsrc sony_laptop agpgart microcode psmouse tpm_tis serio_raw mxm_wmi soundcore snd_page_alloc tpm acpi_cpufreq lpc_ich pcmcia_core tpm_bios mperf processor lp parport firewire_ohci firewire_core crc_itu_t sdhci_pci sdhci thermal e1000e
[92146.548173] Pid: 4304, comm: kworker/0:0 Tainted: G W 3.8.0-rc3-s0i3-v3-test+ #9
[92146.548175] Call Trace:
[92146.548189] [<c10378e2>] warn_slowpath_common+0x72/0xa0
[92146.548227] [<f86398b4>] ? intel_wait_for_pipe_off+0x184/0x190 [i915]
[92146.548263] [<f86398b4>] ? intel_wait_for_pipe_off+0x184/0x190 [i915]
[92146.548270] [<c10379b3>] warn_slowpath_fmt+0x33/0x40
[92146.548307] [<f86398b4>] intel_wait_for_pipe_off+0x184/0x190 [i915]
[92146.548344] [<f86399c2>] intel_disable_pipe+0x102/0x190 [i915]
[92146.548380] [<f8639ea4>] ? intel_disable_plane+0x64/0x80 [i915]
[92146.548417] [<f8639f7c>] i9xx_crtc_disable+0xbc/0x150 [i915]
[92146.548456] [<f863ebee>] intel_crtc_update_dpms+0x5e/0x90 [i915]
[92146.548493] [<f86437cf>] intel_modeset_setup_hw_state+0x42f/0x8f0 [i915]
[92146.548535] [<f8645b0b>] intel_lid_notify+0x9b/0xc0 [i915]
[92146.548543] [<c15610d3>] notifier_call_chain+0x43/0x60
[92146.548550] [<c105d1e1>] __blocking_notifier_call_chain+0x41/0x80
[92146.548556] [<c105d23f>] blocking_notifier_call_chain+0x1f/0x30
[92146.548563] [<c131a684>] acpi_lid_send_state+0x78/0xa4
[92146.548569] [<c131aa9e>] acpi_button_notify+0x3b/0xf1
[92146.548577] [<c12df56a>] ? acpi_os_execute+0x17/0x19
[92146.548582] [<c12e591a>] ? acpi_ec_sync_query+0xa5/0xbc
[92146.548589] [<c12e2b82>] acpi_device_notify+0x16/0x18
[92146.548595] [<c12f4904>] acpi_ev_notify_dispatch+0x38/0x4f
[92146.548600] [<c12df0e8>] acpi_os_execute_deferred+0x20/0x2b
[92146.548607] [<c1051208>] process_one_work+0x128/0x3f0
[92146.548613] [<c1564f73>] ? common_interrupt+0x33/0x38
[92146.548618] [<c104f8c0>] ? wake_up_worker+0x30/0x30
[92146.548624] [<c12df0c8>] ? acpi_os_wait_events_complete+0x1e/0x1e
[92146.548629] [<c10524f9>] worker_thread+0x119/0x3b0
[92146.548634] [<c10523e0>] ? manage_workers+0x240/0x240
[92146.548640] [<c1056e84>] kthread+0x94/0xa0
[92146.548647] [<c1060000>] ? ftrace_raw_output_sched_stat_runtime+0x70/0xf0
[92146.548652] [<c15649b7>] ret_from_kernel_thread+0x1b/0x28
[92146.548658] [<c1056df0>] ? kthread_create_on_node+0xc0/0xc0
three different modeset flags are introduced in this patch
MODESET_ON_LID_OPEN: do modeset on next lid open event
MODESET_DONE: modeset already done
MODESET_SUSPENDED: suspended, only do modeset when system is resumed
In this way,
1. when lid is closed, MODESET_ON_LID_OPEN is set so that
we'll do modeset on next lid open event.
2. when lid is opened, MODESET_DONE is set
so that duplicate lid open events will be ignored.
3. when system suspends, MODESET_SUSPENDED is set.
In this case, we will not do modeset on any lid events.
Plus, locking mechanism is also introduced to avoid racing.
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-02-05 14:41:53 +07:00
|
|
|
enum modeset_restore modeset_restore;
|
|
|
|
struct mutex modeset_restore_lock;
|
2008-07-31 02:06:12 +07:00
|
|
|
|
2013-07-17 06:50:07 +07:00
|
|
|
struct list_head vm_list; /* Global list of all address spaces */
|
drm/i915: Split out GTT specific header file
This file contains all necessary defines, prototypes and typesdefs for
manipulating GEN graphics address translation (this does not include the
legacy AGP driver)
Reiterating the comment in the header,
"Please try to maintain the following order within this file unless it
makes sense to do otherwise. From top to bottom:
1. typedefs
2. #defines, and macros
3. structure definitions
4. function prototypes
Within each section, please try to order by generation in ascending
order, from top to bottom (ie. GEN6 on the top, GEN8 on the bottom)."
I've made some minor cleanups, and fixed a couple of typos while here -
but there should be no functional changes.
The purpose of the patch is to reduce clutter in our main header file,
making room for new growth, and make documentation of our interfaces
easier by splitting things out.
With a little more work, like making i915_gtt a pointer, we could
potentially completely isolate this header from i915_drv.h. At the
moment however, I don't think it's worth the effort.
Personally, I would have liked to put the PTE encoding functions in this
file too, but I didn't want to rock the boat too much.
A similar patch has been in use on my machine for some time. This exact
patch though has only been compile tested.
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-03-23 12:47:21 +07:00
|
|
|
struct i915_gtt gtt; /* VM representing the global address space */
|
2013-01-18 03:45:15 +07:00
|
|
|
|
2012-11-14 23:14:03 +07:00
|
|
|
struct i915_gem_mm mm;
|
2014-08-07 20:20:40 +07:00
|
|
|
DECLARE_HASHTABLE(mm_structs, 7);
|
|
|
|
struct mutex mm_lock;
|
2012-05-02 16:49:32 +07:00
|
|
|
|
|
|
|
/* Kernel Modesetting */
|
|
|
|
|
2009-05-31 16:17:17 +07:00
|
|
|
struct sdvo_device_mapping sdvo_mappings[2];
|
2009-08-18 03:31:43 +07:00
|
|
|
|
2014-02-08 02:12:52 +07:00
|
|
|
struct drm_crtc *plane_to_crtc_mapping[I915_MAX_PIPES];
|
|
|
|
struct drm_crtc *pipe_to_crtc_mapping[I915_MAX_PIPES];
|
2009-11-18 23:25:18 +07:00
|
|
|
wait_queue_head_t pending_flip_queue;
|
|
|
|
|
2013-10-22 02:04:07 +07:00
|
|
|
#ifdef CONFIG_DEBUG_FS
|
|
|
|
struct intel_pipe_crc pipe_crc[I915_MAX_PIPES];
|
|
|
|
#endif
|
|
|
|
|
2013-06-05 18:34:06 +07:00
|
|
|
int num_shared_dpll;
|
|
|
|
struct intel_shared_dpll shared_dplls[I915_NUM_PLLS];
|
2013-11-06 13:36:35 +07:00
|
|
|
int dpio_phy_iosf_port[I915_NUM_PHYS_VLV];
|
2012-04-20 23:11:53 +07:00
|
|
|
|
2014-10-07 21:21:26 +07:00
|
|
|
struct i915_workarounds workarounds;
|
2014-08-26 20:44:51 +07:00
|
|
|
|
2009-08-18 03:31:43 +07:00
|
|
|
/* Reclocking support */
|
|
|
|
bool render_reclock_avail;
|
|
|
|
bool lvds_downclock_avail;
|
2009-11-20 10:24:16 +07:00
|
|
|
/* indicates the reduced downclock for LVDS*/
|
|
|
|
int lvds_downclock;
|
drm/i915: Track frontbuffer invalidation/flushing
So these are the guts of the new beast. This tracks when a frontbuffer
gets invalidated (due to frontbuffer rendering) and hence should be
constantly scaned out, and when it's flushed again and can be
compressed/one-shot-upload.
Rules for flushing are simple: The frontbuffer needs one more full
upload starting from the next vblank. Which means that the flushing
can _only_ be called once the frontbuffer update has been latched.
But this poses a problem for pageflips: We can't just delay the
flushing until the pageflip is latched, since that would pose the risk
that we override frontbuffer rendering that has been scheduled
in-between the pageflip ioctl and the actual latching.
To handle this track asynchronous invalidations (and also pageflip)
state per-ring and delay any in-between flushing until the rendering
has completed. And also cancel any delayed flushing if we get a new
invalidation request (whether delayed or not).
Also call intel_mark_fb_busy in both cases in all cases to make sure
that we keep the screen at the highest refresh rate both on flips,
synchronous plane updates and for frontbuffer rendering.
v2: Lots of improvements
Suggestions from Chris:
- Move invalidate/flush in flush_*_domain and set_to_*_domain.
- Drop the flush in busy_ioctl since it's redundant. Was a leftover
from an earlier concept to track flips/delayed flushes.
- Don't forget about the initial modeset enable/final disable.
Suggested by Chris.
Track flips accurately, too. Since flips complete independently of
rendering we need to track pending flips in a separate mask. Again if
an invalidate happens we need to cancel the evenutal flush to avoid
races.
v3:
Provide correct header declarations for flip functions. Currently not
needed outside of intel_display.c, but part of the proper interface.
v4: Add proper domain management to fbcon so that the fbcon buffer is
also tracked correctly.
v5: Fixup locking around the fbcon set_to_gtt_domain call.
v6: More comments from Chris:
- Split out fbcon changes.
- Drop superflous checks for potential scanout before calling intel_fb
functions - we can micro-optimize this later.
- s/intel_fb_/intel_fb_obj_/ to make it clear that this deals in gem
object. We already have precedence for fb_obj in the pin_and_fence
functions.
v7: Clarify the semantics of the flip flush handling by renaming
things a bit:
- Don't go through a gem object but take the relevant frontbuffer bits
directly. These functions center on the plane, the actual object is
irrelevant - even a flip to the same object as already active should
cause a flush.
- Add a new intel_frontbuffer_flip for synchronous plane updates. It
currently just calls intel_frontbuffer_flush since the implemenation
differs.
This way we achieve a clear split between one-shot update events on
one side and frontbuffer rendering with potentially a very long delay
between the invalidate and flush.
Chris and I also had some discussions about mark_busy and whether it
is appropriate to call from flush. But mark busy is a state which
should be derived from the 3 events (invalidate, flush, flip) we now
have by the users, like psr does by tracking relevant information in
psr.busy_frontbuffer_bits. DRRS (the only real use of mark_busy for
frontbuffer) needs to have similar logic. With that the overall
mark_busy in the core could be removed.
v8: Only when retiring gpu buffers only flush frontbuffer bits we
actually invalidated in a batch. Just for safety since before any
additional usage/invalidate we should always retire current rendering.
Suggested by Chris Wilson.
v9: Actually use intel_frontbuffer_flip in all appropriate places.
Spotted by Chris.
v10: Address more comments from Chris:
- Don't call _flip in set_base when the crtc is inactive, avoids redunancy
in the modeset case with the initial enabling of all planes.
- Add comments explaining that the initial/final plane enable/disable
still has work left to do before it's fully generic.
v11: Only invalidate for gtt/cpu access when writing. Spotted by Chris.
v12: s/_flush/_flip/ in intel_overlay.c per Chris' comment.
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-06-19 21:01:59 +07:00
|
|
|
|
|
|
|
struct i915_frontbuffer_tracking fb_tracking;
|
|
|
|
|
2009-08-18 03:31:43 +07:00
|
|
|
u16 orig_clock;
|
2010-01-30 02:27:07 +07:00
|
|
|
|
2009-12-17 13:48:43 +07:00
|
|
|
bool mchbar_need_disable;
|
2010-01-30 02:27:07 +07:00
|
|
|
|
2012-11-03 01:55:07 +07:00
|
|
|
struct intel_l3_parity l3_parity;
|
|
|
|
|
2013-07-05 01:02:05 +07:00
|
|
|
/* Cannot be determined by PCIID. You must always read a register. */
|
|
|
|
size_t ellc_size;
|
|
|
|
|
2012-08-09 04:35:35 +07:00
|
|
|
/* gen6+ rps state */
|
2012-11-03 01:55:03 +07:00
|
|
|
struct intel_gen6_power_mgmt rps;
|
2012-08-09 04:35:35 +07:00
|
|
|
|
2012-08-09 04:35:39 +07:00
|
|
|
/* ilk-only ips/rps state. Everything in here is protected by the global
|
|
|
|
* mchdev_lock in intel_pm.c */
|
2012-11-03 01:55:03 +07:00
|
|
|
struct intel_ilk_power_mgmt ips;
|
2010-02-06 03:42:41 +07:00
|
|
|
|
2013-10-25 21:36:47 +07:00
|
|
|
struct i915_power_domains power_domains;
|
2013-05-30 21:07:11 +07:00
|
|
|
|
2013-10-04 02:15:06 +07:00
|
|
|
struct i915_psr psr;
|
2013-07-12 04:45:00 +07:00
|
|
|
|
2012-11-14 23:14:04 +07:00
|
|
|
struct i915_gpu_error gpu_error;
|
2010-10-01 20:57:56 +07:00
|
|
|
|
2013-05-09 00:45:13 +07:00
|
|
|
struct drm_i915_gem_object *vlv_pctx;
|
|
|
|
|
2013-10-09 14:18:51 +07:00
|
|
|
#ifdef CONFIG_DRM_I915_FBDEV
|
2010-03-30 12:34:14 +07:00
|
|
|
/* list of fbdev register on this device */
|
|
|
|
struct intel_fbdev *fbdev;
|
2014-08-13 19:09:46 +07:00
|
|
|
struct work_struct fbdev_suspend_work;
|
2013-10-09 14:18:51 +07:00
|
|
|
#endif
|
2011-02-22 05:23:52 +07:00
|
|
|
|
|
|
|
struct drm_property *broadcast_rgb_property;
|
2011-05-13 04:17:24 +07:00
|
|
|
struct drm_property *force_audio_property;
|
2012-05-26 06:56:22 +07:00
|
|
|
|
drm/i915: preliminary context support
Very basic code for context setup/destruction in the driver.
Adds the file i915_gem_context.c This file implements HW context
support. On gen5+ a HW context consists of an opaque GPU object which is
referenced at times of context saves and restores. With RC6 enabled,
the context is also referenced as the GPU enters and exists from RC6
(GPU has it's own internal power context, except on gen5). Though
something like a context does exist for the media ring, the code only
supports contexts for the render ring.
In software, there is a distinction between contexts created by the
user, and the default HW context. The default HW context is used by GPU
clients that do not request setup of their own hardware context. The
default context's state is never restored to help prevent programming
errors. This would happen if a client ran and piggy-backed off another
clients GPU state. The default context only exists to give the GPU some
offset to load as the current to invoke a save of the context we
actually care about. In fact, the code could likely be constructed,
albeit in a more complicated fashion, to never use the default context,
though that limits the driver's ability to swap out, and/or destroy
other contexts.
All other contexts are created as a request by the GPU client. These
contexts store GPU state, and thus allow GPU clients to not re-emit
state (and potentially query certain state) at any time. The kernel
driver makes certain that the appropriate commands are inserted.
There are 4 entry points into the contexts, init, fini, open, close.
The names are self-explanatory except that init can be called during
reset, and also during pm thaw/resume. As we expect our context to be
preserved across these events, we do not reinitialize in this case.
As Adam Jackson pointed out, The cutoff of 1MB where a HW context is
considered too big is arbitrary. The reason for this is even though
context sizes are increasing with every generation, they have yet to
eclipse even 32k. If we somehow read back way more than that, it
probably means BIOS has done something strange, or we're running on a
platform that wasn't designed for this.
v2: rename load/unload to init/fini (daniel)
remove ILK support for get_size() (indirectly daniel)
add HAS_HW_CONTEXTS macro to clarify supported platforms (daniel)
added comments (Ben)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
2012-06-05 04:42:42 +07:00
|
|
|
uint32_t hw_context_size;
|
2013-09-18 11:12:45 +07:00
|
|
|
struct list_head context_list;
|
2012-11-03 01:55:02 +07:00
|
|
|
|
2012-12-12 01:48:29 +07:00
|
|
|
u32 fdi_rx_config;
|
2012-12-01 21:04:26 +07:00
|
|
|
|
2014-03-10 16:01:44 +07:00
|
|
|
u32 suspend_count;
|
2012-11-03 01:55:02 +07:00
|
|
|
struct i915_suspend_saved_registers regfile;
|
2014-05-05 19:19:56 +07:00
|
|
|
struct vlv_s0ix_state vlv_s0ix_state;
|
2012-11-03 01:55:05 +07:00
|
|
|
|
2013-08-01 20:18:50 +07:00
|
|
|
struct {
|
|
|
|
/*
|
|
|
|
* Raw watermark latency values:
|
|
|
|
* in 0.1us units for WM0,
|
|
|
|
* in 0.5us units for WM1+.
|
|
|
|
*/
|
|
|
|
/* primary */
|
|
|
|
uint16_t pri_latency[5];
|
|
|
|
/* sprite */
|
|
|
|
uint16_t spr_latency[5];
|
|
|
|
/* cursor */
|
|
|
|
uint16_t cur_latency[5];
|
2014-11-05 00:06:38 +07:00
|
|
|
/*
|
|
|
|
* Raw watermark memory latency values
|
|
|
|
* for SKL for all 8 levels
|
|
|
|
* in 1us units.
|
|
|
|
*/
|
|
|
|
uint16_t skl_latency[8];
|
2013-10-09 23:18:03 +07:00
|
|
|
|
2014-11-05 00:06:42 +07:00
|
|
|
/*
|
|
|
|
* The skl_wm_values structure is a bit too big for stack
|
|
|
|
* allocation, so we keep the staging struct where we store
|
|
|
|
* intermediate results here instead.
|
|
|
|
*/
|
|
|
|
struct skl_wm_values skl_results;
|
|
|
|
|
2013-10-09 23:18:03 +07:00
|
|
|
/* current hardware state */
|
2014-11-05 00:06:42 +07:00
|
|
|
union {
|
|
|
|
struct ilk_wm_values hw;
|
|
|
|
struct skl_wm_values skl_hw;
|
|
|
|
};
|
2013-08-01 20:18:50 +07:00
|
|
|
} wm;
|
|
|
|
|
2013-12-07 05:32:13 +07:00
|
|
|
struct i915_runtime_pm pm;
|
|
|
|
|
2014-06-18 08:29:35 +07:00
|
|
|
struct intel_digital_port *hpd_irq_port[I915_MAX_PORTS];
|
|
|
|
u32 long_hpd_port_mask;
|
|
|
|
u32 short_hpd_port_mask;
|
|
|
|
struct work_struct dig_port_work;
|
|
|
|
|
2014-05-02 11:02:48 +07:00
|
|
|
/*
|
|
|
|
* if we get a HPD irq from DP and a HPD irq from non-DP
|
|
|
|
* the non-DP HPD could block the workqueue on a mode config
|
|
|
|
* mutex getting, that userspace may have taken. However
|
|
|
|
* userspace is waiting on the DP workqueue to run which is
|
|
|
|
* blocked behind the non-DP one.
|
|
|
|
*/
|
|
|
|
struct workqueue_struct *dp_wq;
|
|
|
|
|
2014-08-15 05:22:08 +07:00
|
|
|
uint32_t bios_vgacntr;
|
|
|
|
|
2014-07-24 23:04:21 +07:00
|
|
|
/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
|
|
|
|
struct {
|
|
|
|
int (*do_execbuf)(struct drm_device *dev, struct drm_file *file,
|
|
|
|
struct intel_engine_cs *ring,
|
|
|
|
struct intel_context *ctx,
|
|
|
|
struct drm_i915_gem_execbuffer2 *args,
|
|
|
|
struct list_head *vmas,
|
|
|
|
struct drm_i915_gem_object *batch_obj,
|
|
|
|
u64 exec_start, u32 flags);
|
|
|
|
int (*init_rings)(struct drm_device *dev);
|
|
|
|
void (*cleanup_ring)(struct intel_engine_cs *ring);
|
|
|
|
void (*stop_ring)(struct intel_engine_cs *ring);
|
|
|
|
} gt;
|
|
|
|
|
2014-05-21 22:37:52 +07:00
|
|
|
/*
|
|
|
|
* NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
|
|
|
|
* will be rejected. Instead look for a better place.
|
|
|
|
*/
|
2014-03-31 18:27:22 +07:00
|
|
|
};
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2013-08-02 00:39:55 +07:00
|
|
|
static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
|
|
|
|
{
|
|
|
|
return dev->dev_private;
|
|
|
|
}
|
|
|
|
|
2012-05-11 20:29:30 +07:00
|
|
|
/* Iterate over initialised rings */
|
|
|
|
#define for_each_ring(ring__, dev_priv__, i__) \
|
|
|
|
for ((i__) = 0; (i__) < I915_NUM_RINGS; (i__)++) \
|
|
|
|
if (((ring__) = &(dev_priv__)->ring[(i__)]), intel_ring_initialized((ring__)))
|
|
|
|
|
2012-02-14 10:45:36 +07:00
|
|
|
enum hdmi_force_audio {
|
|
|
|
HDMI_AUDIO_OFF_DVI = -2, /* no aux data for HDMI-DVI converter */
|
|
|
|
HDMI_AUDIO_OFF, /* force turn off HDMI audio */
|
|
|
|
HDMI_AUDIO_AUTO, /* trust EDID */
|
|
|
|
HDMI_AUDIO_ON, /* force turn on HDMI audio */
|
|
|
|
};
|
|
|
|
|
2013-07-04 18:06:28 +07:00
|
|
|
#define I915_GTT_OFFSET_NONE ((u32)-1)
|
2012-11-15 18:32:19 +07:00
|
|
|
|
2012-06-07 21:38:42 +07:00
|
|
|
struct drm_i915_gem_object_ops {
|
|
|
|
/* Interface between the GEM object and its backing storage.
|
|
|
|
* get_pages() is called once prior to the use of the associated set
|
|
|
|
* of pages before to binding them into the GTT, and put_pages() is
|
|
|
|
* called after we no longer need them. As we expect there to be
|
|
|
|
* associated cost with migrating pages between the backing storage
|
|
|
|
* and making them available for the GPU (e.g. clflush), we may hold
|
|
|
|
* onto the pages after they are no longer referenced by the GPU
|
|
|
|
* in case they may be used again shortly (for example migrating the
|
|
|
|
* pages to a different memory domain within the GTT). put_pages()
|
|
|
|
* will therefore most likely be called when the object itself is
|
|
|
|
* being released or under memory pressure (where we attempt to
|
|
|
|
* reap pages for the shrinker).
|
|
|
|
*/
|
|
|
|
int (*get_pages)(struct drm_i915_gem_object *);
|
|
|
|
void (*put_pages)(struct drm_i915_gem_object *);
|
drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).
v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.
v11: Drop vma behaviour changes -- locking is nigh on impossible.
Use a worker to load user pages to avoid lock inversions.
v12: Use get_task_mm()/mmput() for correct refcounting of mm.
v13: Use a worker to release the mmu_notifier to avoid lock inversion
v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
with its own locking and tree of objects for each mm/mmu_notifier.
v15: Prevent overlapping userptr objects, and invalidate all objects
within the mmu_notifier range
v16: Fix a typo for iterating over multiple objects in the range and
rearrange error path to destroy the mmu_notifier locklessly.
Also close a race between invalidate_range and the get_pages_worker.
v17: Close a race between get_pages_worker/invalidate_range and fresh
allocations of the same userptr range - and notice that
struct_mutex was presumed to be held when during creation it wasn't.
v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory
for the struct sg_table and to clear it before reporting an error.
v19: Always error out on read-only userptr requests as we don't have the
hardware infrastructure to support them at the moment.
v20: Refuse to implement read-only support until we have the required
infrastructure - but reserve the bit in flags for future use.
v21: use_mm() is not required for get_user_pages(). It is only meant to
be used to fix up the kernel thread's current->mm for use with
copy_user().
v22: Use sg_alloc_table_from_pages for that chunky feeling
v23: Export a function for sanity checking dma-buf rather than encode
userptr details elsewhere, and clean up comments based on
suggestions by Bradley.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Akash Goel <akash.goel@intel.com>
Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com>
[danvet: Frob ioctl allocation to pick the next one - will cause a bit
of fuss with create2 apparently, but such are the rules.]
[danvet2: oops, forgot to git add after manual patch application]
[danvet3: Appease sparse.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-05-16 20:22:37 +07:00
|
|
|
int (*dmabuf_export)(struct drm_i915_gem_object *);
|
|
|
|
void (*release)(struct drm_i915_gem_object *);
|
2012-06-07 21:38:42 +07:00
|
|
|
};
|
|
|
|
|
2014-06-19 04:28:09 +07:00
|
|
|
/*
|
|
|
|
* Frontbuffer tracking bits. Set in obj->frontbuffer_bits while a gem bo is
|
|
|
|
* considered to be the frontbuffer for the given plane interface-vise. This
|
|
|
|
* doesn't mean that the hw necessarily already scans it out, but that any
|
|
|
|
* rendering (by the cpu or gpu) will land in the frontbuffer eventually.
|
|
|
|
*
|
|
|
|
* We have one bit per pipe and per scanout plane type.
|
|
|
|
*/
|
|
|
|
#define INTEL_FRONTBUFFER_BITS_PER_PIPE 4
|
|
|
|
#define INTEL_FRONTBUFFER_BITS \
|
|
|
|
(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES)
|
|
|
|
#define INTEL_FRONTBUFFER_PRIMARY(pipe) \
|
|
|
|
(1 << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
|
|
|
|
#define INTEL_FRONTBUFFER_CURSOR(pipe) \
|
|
|
|
(1 << (1 +(INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))))
|
|
|
|
#define INTEL_FRONTBUFFER_SPRITE(pipe) \
|
|
|
|
(1 << (2 +(INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))))
|
|
|
|
#define INTEL_FRONTBUFFER_OVERLAY(pipe) \
|
|
|
|
(1 << (3 +(INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))))
|
2014-06-18 18:59:13 +07:00
|
|
|
#define INTEL_FRONTBUFFER_ALL_MASK(pipe) \
|
|
|
|
(0xf << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
|
2014-06-19 04:28:09 +07:00
|
|
|
|
2008-07-31 02:06:12 +07:00
|
|
|
struct drm_i915_gem_object {
|
2010-04-10 02:05:07 +07:00
|
|
|
struct drm_gem_object base;
|
2008-07-31 02:06:12 +07:00
|
|
|
|
2012-06-07 21:38:42 +07:00
|
|
|
const struct drm_i915_gem_object_ops *ops;
|
|
|
|
|
2013-07-18 02:19:03 +07:00
|
|
|
/** List of VMAs backed by this object */
|
|
|
|
struct list_head vma_list;
|
|
|
|
|
2012-11-15 18:32:21 +07:00
|
|
|
/** Stolen memory for this object, instead of being backed by shmem. */
|
|
|
|
struct drm_mm_node *stolen;
|
2013-06-01 01:28:48 +07:00
|
|
|
struct list_head global_list;
|
2008-07-31 02:06:12 +07:00
|
|
|
|
2010-10-19 16:36:51 +07:00
|
|
|
struct list_head ring_list;
|
2013-08-14 16:38:33 +07:00
|
|
|
/** Used in execbuf to temporarily hold a ref */
|
|
|
|
struct list_head obj_exec_link;
|
2008-07-31 02:06:12 +07:00
|
|
|
|
|
|
|
/**
|
2012-07-20 18:41:02 +07:00
|
|
|
* This is set if the object is on the active lists (has pending
|
|
|
|
* rendering and so a non-zero seqno), and is not set if it i s on
|
|
|
|
* inactive (ready to be unbound) list.
|
2008-07-31 02:06:12 +07:00
|
|
|
*/
|
2011-08-17 02:34:10 +07:00
|
|
|
unsigned int active:1;
|
2008-07-31 02:06:12 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* This is set if the object has been written to since last bound
|
|
|
|
* to the GTT
|
|
|
|
*/
|
2011-08-17 02:34:10 +07:00
|
|
|
unsigned int dirty:1;
|
2010-05-13 16:49:44 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Fence register bits (if any) for this object. Will be set
|
|
|
|
* as needed when mapped into the GTT.
|
|
|
|
* Protected by dev->struct_mutex.
|
|
|
|
*/
|
2011-10-10 02:52:02 +07:00
|
|
|
signed int fence_reg:I915_MAX_NUM_FENCE_BITS;
|
2010-05-13 16:49:44 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Advice: are the backing pages purgeable?
|
|
|
|
*/
|
2011-08-17 02:34:10 +07:00
|
|
|
unsigned int madv:2;
|
2010-05-13 16:49:44 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Current tiling mode for the object.
|
|
|
|
*/
|
2011-08-17 02:34:10 +07:00
|
|
|
unsigned int tiling_mode:2;
|
2012-04-21 22:23:23 +07:00
|
|
|
/**
|
|
|
|
* Whether the tiling parameters for the currently associated fence
|
|
|
|
* register have changed. Note that for the purposes of tracking
|
|
|
|
* tiling changes we also treat the unfenced register, the register
|
|
|
|
* slot that the object occupies whilst it executes a fenced
|
|
|
|
* command (such as BLT on gen2/3), as a "fence".
|
|
|
|
*/
|
|
|
|
unsigned int fence_dirty:1;
|
2010-05-13 16:49:44 +07:00
|
|
|
|
2010-11-04 23:11:09 +07:00
|
|
|
/**
|
|
|
|
* Is the object at the current location in the gtt mappable and
|
|
|
|
* fenceable? Used to avoid costly recalculations.
|
|
|
|
*/
|
2011-08-17 02:34:10 +07:00
|
|
|
unsigned int map_and_fenceable:1;
|
2010-11-04 23:11:09 +07:00
|
|
|
|
2010-10-02 03:05:20 +07:00
|
|
|
/**
|
|
|
|
* Whether the current gtt mapping needs to be mappable (and isn't just
|
|
|
|
* mappable by accident). Track pin and fault separate for a more
|
|
|
|
* accurate mappable working set.
|
|
|
|
*/
|
2011-08-17 02:34:10 +07:00
|
|
|
unsigned int fault_mappable:1;
|
|
|
|
unsigned int pin_mappable:1;
|
2013-08-09 18:25:09 +07:00
|
|
|
unsigned int pin_display:1;
|
2010-10-02 03:05:20 +07:00
|
|
|
|
2014-06-17 12:29:42 +07:00
|
|
|
/*
|
|
|
|
* Is the object to be mapped as read-only to the GPU
|
|
|
|
* Only honoured if hardware has relevant pte bit
|
|
|
|
*/
|
|
|
|
unsigned long gt_ro:1;
|
2013-08-08 20:41:10 +07:00
|
|
|
unsigned int cache_level:3;
|
2011-03-30 06:59:50 +07:00
|
|
|
|
2012-06-01 21:20:22 +07:00
|
|
|
unsigned int has_dma_mapping:1;
|
2012-02-09 23:15:47 +07:00
|
|
|
|
2014-06-19 04:28:09 +07:00
|
|
|
unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
|
|
|
|
|
2012-06-01 21:20:22 +07:00
|
|
|
struct sg_table *pages;
|
2012-09-05 03:02:54 +07:00
|
|
|
int pages_pin_count;
|
2008-07-31 02:06:12 +07:00
|
|
|
|
2012-05-10 20:25:09 +07:00
|
|
|
/* prime dma-buf support */
|
2012-05-22 19:09:21 +07:00
|
|
|
void *dma_buf_vmapping;
|
|
|
|
int vmapping_count;
|
|
|
|
|
2014-05-22 20:13:33 +07:00
|
|
|
struct intel_engine_cs *ring;
|
2010-11-12 20:53:37 +07:00
|
|
|
|
2012-04-17 21:31:27 +07:00
|
|
|
/** Breadcrumb of last rendering to the buffer. */
|
2012-07-20 18:41:01 +07:00
|
|
|
uint32_t last_read_seqno;
|
|
|
|
uint32_t last_write_seqno;
|
2010-11-12 20:53:37 +07:00
|
|
|
/** Breadcrumb of last fenced GPU access to the buffer. */
|
|
|
|
uint32_t last_fenced_seqno;
|
2008-07-31 02:06:12 +07:00
|
|
|
|
2010-05-13 16:49:44 +07:00
|
|
|
/** Current tiling stride for the object, if it's tiled. */
|
2008-11-13 01:03:55 +07:00
|
|
|
uint32_t stride;
|
2008-07-31 02:06:12 +07:00
|
|
|
|
2013-10-10 02:23:52 +07:00
|
|
|
/** References from framebuffers, locks out tiling changes. */
|
|
|
|
unsigned long framebuffer_references;
|
|
|
|
|
2009-03-13 06:56:27 +07:00
|
|
|
/** Record of address bit 17 of each page at last unbind. */
|
2010-06-06 21:40:22 +07:00
|
|
|
unsigned long *bit_17;
|
2009-03-13 06:56:27 +07:00
|
|
|
|
DRM: i915: add mode setting support
This commit adds i915 driver support for the DRM mode setting APIs.
Currently, VGA, LVDS, SDVO DVI & VGA, TV and DVO LVDS outputs are
supported. HDMI, DisplayPort and additional SDVO output support will
follow.
Support for the mode setting code is controlled by the new 'modeset'
module option. A new config option, CONFIG_DRM_I915_KMS controls the
default behavior, and whether a PCI ID list is built into the module for
use by user level module utilities.
Note that if mode setting is enabled, user level drivers that access
display registers directly or that don't use the kernel graphics memory
manager will likely corrupt kernel graphics memory, disrupt output
configuration (possibly leading to hangs and/or blank displays), and
prevent panic/oops messages from appearing. So use caution when
enabling this code; be sure your user level code supports the new
interfaces.
A new SysRq key, 'g', provides emergency support for switching back to
the kernel's framebuffer console; which is useful for testing.
Co-authors: Dave Airlie <airlied@linux.ie>, Hong Liu <hong.liu@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2008-11-08 05:24:08 +07:00
|
|
|
/** User space pin count and filp owning the pin */
|
2013-10-10 19:46:37 +07:00
|
|
|
unsigned long user_pin_count;
|
DRM: i915: add mode setting support
This commit adds i915 driver support for the DRM mode setting APIs.
Currently, VGA, LVDS, SDVO DVI & VGA, TV and DVO LVDS outputs are
supported. HDMI, DisplayPort and additional SDVO output support will
follow.
Support for the mode setting code is controlled by the new 'modeset'
module option. A new config option, CONFIG_DRM_I915_KMS controls the
default behavior, and whether a PCI ID list is built into the module for
use by user level module utilities.
Note that if mode setting is enabled, user level drivers that access
display registers directly or that don't use the kernel graphics memory
manager will likely corrupt kernel graphics memory, disrupt output
configuration (possibly leading to hangs and/or blank displays), and
prevent panic/oops messages from appearing. So use caution when
enabling this code; be sure your user level code supports the new
interfaces.
A new SysRq key, 'g', provides emergency support for switching back to
the kernel's framebuffer console; which is useful for testing.
Co-authors: Dave Airlie <airlied@linux.ie>, Hong Liu <hong.liu@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2008-11-08 05:24:08 +07:00
|
|
|
struct drm_file *pin_filp;
|
2008-12-30 17:31:46 +07:00
|
|
|
|
drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).
v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.
v11: Drop vma behaviour changes -- locking is nigh on impossible.
Use a worker to load user pages to avoid lock inversions.
v12: Use get_task_mm()/mmput() for correct refcounting of mm.
v13: Use a worker to release the mmu_notifier to avoid lock inversion
v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
with its own locking and tree of objects for each mm/mmu_notifier.
v15: Prevent overlapping userptr objects, and invalidate all objects
within the mmu_notifier range
v16: Fix a typo for iterating over multiple objects in the range and
rearrange error path to destroy the mmu_notifier locklessly.
Also close a race between invalidate_range and the get_pages_worker.
v17: Close a race between get_pages_worker/invalidate_range and fresh
allocations of the same userptr range - and notice that
struct_mutex was presumed to be held when during creation it wasn't.
v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory
for the struct sg_table and to clear it before reporting an error.
v19: Always error out on read-only userptr requests as we don't have the
hardware infrastructure to support them at the moment.
v20: Refuse to implement read-only support until we have the required
infrastructure - but reserve the bit in flags for future use.
v21: use_mm() is not required for get_user_pages(). It is only meant to
be used to fix up the kernel thread's current->mm for use with
copy_user().
v22: Use sg_alloc_table_from_pages for that chunky feeling
v23: Export a function for sanity checking dma-buf rather than encode
userptr details elsewhere, and clean up comments based on
suggestions by Bradley.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Akash Goel <akash.goel@intel.com>
Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com>
[danvet: Frob ioctl allocation to pick the next one - will cause a bit
of fuss with create2 apparently, but such are the rules.]
[danvet2: oops, forgot to git add after manual patch application]
[danvet3: Appease sparse.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-05-16 20:22:37 +07:00
|
|
|
union {
|
2014-11-04 19:51:40 +07:00
|
|
|
/** for phy allocated objects */
|
|
|
|
struct drm_dma_handle *phys_handle;
|
|
|
|
|
drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).
v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.
v11: Drop vma behaviour changes -- locking is nigh on impossible.
Use a worker to load user pages to avoid lock inversions.
v12: Use get_task_mm()/mmput() for correct refcounting of mm.
v13: Use a worker to release the mmu_notifier to avoid lock inversion
v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
with its own locking and tree of objects for each mm/mmu_notifier.
v15: Prevent overlapping userptr objects, and invalidate all objects
within the mmu_notifier range
v16: Fix a typo for iterating over multiple objects in the range and
rearrange error path to destroy the mmu_notifier locklessly.
Also close a race between invalidate_range and the get_pages_worker.
v17: Close a race between get_pages_worker/invalidate_range and fresh
allocations of the same userptr range - and notice that
struct_mutex was presumed to be held when during creation it wasn't.
v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory
for the struct sg_table and to clear it before reporting an error.
v19: Always error out on read-only userptr requests as we don't have the
hardware infrastructure to support them at the moment.
v20: Refuse to implement read-only support until we have the required
infrastructure - but reserve the bit in flags for future use.
v21: use_mm() is not required for get_user_pages(). It is only meant to
be used to fix up the kernel thread's current->mm for use with
copy_user().
v22: Use sg_alloc_table_from_pages for that chunky feeling
v23: Export a function for sanity checking dma-buf rather than encode
userptr details elsewhere, and clean up comments based on
suggestions by Bradley.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Akash Goel <akash.goel@intel.com>
Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com>
[danvet: Frob ioctl allocation to pick the next one - will cause a bit
of fuss with create2 apparently, but such are the rules.]
[danvet2: oops, forgot to git add after manual patch application]
[danvet3: Appease sparse.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-05-16 20:22:37 +07:00
|
|
|
struct i915_gem_userptr {
|
|
|
|
uintptr_t ptr;
|
|
|
|
unsigned read_only :1;
|
|
|
|
unsigned workers :4;
|
|
|
|
#define I915_GEM_USERPTR_MAX_WORKERS 15
|
|
|
|
|
2014-08-07 20:20:40 +07:00
|
|
|
struct i915_mm_struct *mm;
|
|
|
|
struct i915_mmu_object *mmu_object;
|
drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).
v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.
v11: Drop vma behaviour changes -- locking is nigh on impossible.
Use a worker to load user pages to avoid lock inversions.
v12: Use get_task_mm()/mmput() for correct refcounting of mm.
v13: Use a worker to release the mmu_notifier to avoid lock inversion
v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
with its own locking and tree of objects for each mm/mmu_notifier.
v15: Prevent overlapping userptr objects, and invalidate all objects
within the mmu_notifier range
v16: Fix a typo for iterating over multiple objects in the range and
rearrange error path to destroy the mmu_notifier locklessly.
Also close a race between invalidate_range and the get_pages_worker.
v17: Close a race between get_pages_worker/invalidate_range and fresh
allocations of the same userptr range - and notice that
struct_mutex was presumed to be held when during creation it wasn't.
v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory
for the struct sg_table and to clear it before reporting an error.
v19: Always error out on read-only userptr requests as we don't have the
hardware infrastructure to support them at the moment.
v20: Refuse to implement read-only support until we have the required
infrastructure - but reserve the bit in flags for future use.
v21: use_mm() is not required for get_user_pages(). It is only meant to
be used to fix up the kernel thread's current->mm for use with
copy_user().
v22: Use sg_alloc_table_from_pages for that chunky feeling
v23: Export a function for sanity checking dma-buf rather than encode
userptr details elsewhere, and clean up comments based on
suggestions by Bradley.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Akash Goel <akash.goel@intel.com>
Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com>
[danvet: Frob ioctl allocation to pick the next one - will cause a bit
of fuss with create2 apparently, but such are the rules.]
[danvet2: oops, forgot to git add after manual patch application]
[danvet3: Appease sparse.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-05-16 20:22:37 +07:00
|
|
|
struct work_struct *work;
|
|
|
|
} userptr;
|
|
|
|
};
|
|
|
|
};
|
2010-04-10 02:05:08 +07:00
|
|
|
#define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
|
2010-03-08 19:35:02 +07:00
|
|
|
|
2014-06-19 04:28:09 +07:00
|
|
|
void i915_gem_track_fb(struct drm_i915_gem_object *old,
|
|
|
|
struct drm_i915_gem_object *new,
|
|
|
|
unsigned frontbuffer_bits);
|
|
|
|
|
2008-07-31 02:06:12 +07:00
|
|
|
/**
|
|
|
|
* Request queue structure.
|
|
|
|
*
|
|
|
|
* The request queue allows us to note sequence numbers that have been emitted
|
|
|
|
* and may be associated with active buffers to be retired.
|
|
|
|
*
|
|
|
|
* By keeping this list, we can avoid having to do questionable
|
|
|
|
* sequence-number comparisons on buffer last_rendering_seqnos, and associate
|
|
|
|
* an emission time with seqnos for tracking how far ahead of the GPU we are.
|
|
|
|
*/
|
|
|
|
struct drm_i915_gem_request {
|
2010-05-21 08:08:56 +07:00
|
|
|
/** On Which ring this request was generated */
|
2014-05-22 20:13:33 +07:00
|
|
|
struct intel_engine_cs *ring;
|
2010-05-21 08:08:56 +07:00
|
|
|
|
2008-07-31 02:06:12 +07:00
|
|
|
/** GEM sequence number associated with this request. */
|
|
|
|
uint32_t seqno;
|
|
|
|
|
2013-06-12 19:01:39 +07:00
|
|
|
/** Position in the ringbuffer of the start of the request */
|
|
|
|
u32 head;
|
|
|
|
|
|
|
|
/** Position in the ringbuffer of the end of the request */
|
2012-02-15 18:25:36 +07:00
|
|
|
u32 tail;
|
|
|
|
|
2013-05-02 20:48:08 +07:00
|
|
|
/** Context related to this request */
|
2014-05-22 20:13:37 +07:00
|
|
|
struct intel_context *ctx;
|
2013-05-02 20:48:08 +07:00
|
|
|
|
2013-06-12 19:01:39 +07:00
|
|
|
/** Batch buffer related to this request if any */
|
|
|
|
struct drm_i915_gem_object *batch_obj;
|
|
|
|
|
2008-07-31 02:06:12 +07:00
|
|
|
/** Time at which this request was emitted, in jiffies. */
|
|
|
|
unsigned long emitted_jiffies;
|
|
|
|
|
2009-06-03 14:27:35 +07:00
|
|
|
/** global list entry for this request */
|
2008-07-31 02:06:12 +07:00
|
|
|
struct list_head list;
|
2009-06-03 14:27:35 +07:00
|
|
|
|
2010-09-24 22:02:42 +07:00
|
|
|
struct drm_i915_file_private *file_priv;
|
2009-06-03 14:27:35 +07:00
|
|
|
/** file_priv list entry for this request */
|
|
|
|
struct list_head client_list;
|
2008-07-31 02:06:12 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
struct drm_i915_file_private {
|
drm/i915: Boost RPS frequency for CPU stalls
If we encounter a situation where the CPU blocks waiting for results
from the GPU, give the GPU a kick to boost its the frequency.
This should work to reduce user interface stalls and to quickly promote
mesa to high frequencies - but the cost is that our requested frequency
stalls high (as we do not idle for long enough before rc6 to start
reducing frequencies, nor are we aggressive at down clocking an
underused GPU). However, this should be mitigated by rc6 itself powering
off the GPU when idle, and that energy use is dependent upon the workload
of the GPU in addition to its frequency (e.g. the math or sampler
functions only consume power when used). Still, this is likely to
adversely affect light workloads.
In particular, this nearly eliminates the highly noticeable wake-up lag
in animations from idle. For example, expose or workspace transitions.
(However, given the situation where we fail to downclock, our requested
frequency is almost always the maximum, except for Baytrail where we
manually downclock upon idling. This often masks the latency of
upclocking after being idle, so animations are typically smooth - at the
cost of increased power consumption.)
Stéphane raised the concern that this will punish good applications and
reward bad applications - but due to the nature of how mesa performs its
client throttling, I believe all mesa applications will be roughly
equally affected. To address this concern, and to prevent applications
like compositors from permanently boosting the RPS state, we ratelimit the
frequency of the wait-boosts each client recieves.
Unfortunately, this techinique is ineffective with Ironlake - which also
has dynamic render power states and suffers just as dramatically. For
Ironlake, the thermal/power headroom is shared with the CPU through
Intelligent Power Sharing and the intel-ips module. This leaves us with
no GPU boost frequencies available when coming out of idle, and due to
hardware limitations we cannot change the arbitration between the CPU and
GPU quickly enough to be effective.
v2: Limit each client to receiving a single boost for each active period.
Tested by QA to only marginally increase power, and to demonstrably
increase throughput in games. No latency measurements yet.
v3: Cater for front-buffer rendering with manual throttling.
v4: Tidy up.
v5: Sadly the compositor needs frequent boosts as it may never idle, but
due to its picking mechanism (using ReadPixels) may require frequent
waits. Those waits, along with the waits for the vrefresh swap, conspire
to keep the GPU at low frequencies despite the interactive latency. To
overcome this we ditch the one-boost-per-active-period and just ratelimit
the number of wait-boosts each client can receive.
Reported-and-tested-by: Paul Neumann <paul104x@yahoo.de>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68716
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Stéphane Marchesin <stephane.marchesin@gmail.com>
Cc: Owen Taylor <otaylor@redhat.com>
Cc: "Meng, Mengmeng" <mengmeng.meng@intel.com>
Cc: "Zhuang, Lena" <lena.zhuang@intel.com>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
[danvet: No extern for function prototypes in headers.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-09-25 23:34:56 +07:00
|
|
|
struct drm_i915_private *dev_priv;
|
2014-02-25 22:11:24 +07:00
|
|
|
struct drm_file *file;
|
drm/i915: Boost RPS frequency for CPU stalls
If we encounter a situation where the CPU blocks waiting for results
from the GPU, give the GPU a kick to boost its the frequency.
This should work to reduce user interface stalls and to quickly promote
mesa to high frequencies - but the cost is that our requested frequency
stalls high (as we do not idle for long enough before rc6 to start
reducing frequencies, nor are we aggressive at down clocking an
underused GPU). However, this should be mitigated by rc6 itself powering
off the GPU when idle, and that energy use is dependent upon the workload
of the GPU in addition to its frequency (e.g. the math or sampler
functions only consume power when used). Still, this is likely to
adversely affect light workloads.
In particular, this nearly eliminates the highly noticeable wake-up lag
in animations from idle. For example, expose or workspace transitions.
(However, given the situation where we fail to downclock, our requested
frequency is almost always the maximum, except for Baytrail where we
manually downclock upon idling. This often masks the latency of
upclocking after being idle, so animations are typically smooth - at the
cost of increased power consumption.)
Stéphane raised the concern that this will punish good applications and
reward bad applications - but due to the nature of how mesa performs its
client throttling, I believe all mesa applications will be roughly
equally affected. To address this concern, and to prevent applications
like compositors from permanently boosting the RPS state, we ratelimit the
frequency of the wait-boosts each client recieves.
Unfortunately, this techinique is ineffective with Ironlake - which also
has dynamic render power states and suffers just as dramatically. For
Ironlake, the thermal/power headroom is shared with the CPU through
Intelligent Power Sharing and the intel-ips module. This leaves us with
no GPU boost frequencies available when coming out of idle, and due to
hardware limitations we cannot change the arbitration between the CPU and
GPU quickly enough to be effective.
v2: Limit each client to receiving a single boost for each active period.
Tested by QA to only marginally increase power, and to demonstrably
increase throughput in games. No latency measurements yet.
v3: Cater for front-buffer rendering with manual throttling.
v4: Tidy up.
v5: Sadly the compositor needs frequent boosts as it may never idle, but
due to its picking mechanism (using ReadPixels) may require frequent
waits. Those waits, along with the waits for the vrefresh swap, conspire
to keep the GPU at low frequencies despite the interactive latency. To
overcome this we ditch the one-boost-per-active-period and just ratelimit
the number of wait-boosts each client can receive.
Reported-and-tested-by: Paul Neumann <paul104x@yahoo.de>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68716
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Stéphane Marchesin <stephane.marchesin@gmail.com>
Cc: Owen Taylor <otaylor@redhat.com>
Cc: "Meng, Mengmeng" <mengmeng.meng@intel.com>
Cc: "Zhuang, Lena" <lena.zhuang@intel.com>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
[danvet: No extern for function prototypes in headers.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-09-25 23:34:56 +07:00
|
|
|
|
2008-07-31 02:06:12 +07:00
|
|
|
struct {
|
2012-11-30 03:45:06 +07:00
|
|
|
spinlock_t lock;
|
2009-06-03 14:27:35 +07:00
|
|
|
struct list_head request_list;
|
drm/i915: Boost RPS frequency for CPU stalls
If we encounter a situation where the CPU blocks waiting for results
from the GPU, give the GPU a kick to boost its the frequency.
This should work to reduce user interface stalls and to quickly promote
mesa to high frequencies - but the cost is that our requested frequency
stalls high (as we do not idle for long enough before rc6 to start
reducing frequencies, nor are we aggressive at down clocking an
underused GPU). However, this should be mitigated by rc6 itself powering
off the GPU when idle, and that energy use is dependent upon the workload
of the GPU in addition to its frequency (e.g. the math or sampler
functions only consume power when used). Still, this is likely to
adversely affect light workloads.
In particular, this nearly eliminates the highly noticeable wake-up lag
in animations from idle. For example, expose or workspace transitions.
(However, given the situation where we fail to downclock, our requested
frequency is almost always the maximum, except for Baytrail where we
manually downclock upon idling. This often masks the latency of
upclocking after being idle, so animations are typically smooth - at the
cost of increased power consumption.)
Stéphane raised the concern that this will punish good applications and
reward bad applications - but due to the nature of how mesa performs its
client throttling, I believe all mesa applications will be roughly
equally affected. To address this concern, and to prevent applications
like compositors from permanently boosting the RPS state, we ratelimit the
frequency of the wait-boosts each client recieves.
Unfortunately, this techinique is ineffective with Ironlake - which also
has dynamic render power states and suffers just as dramatically. For
Ironlake, the thermal/power headroom is shared with the CPU through
Intelligent Power Sharing and the intel-ips module. This leaves us with
no GPU boost frequencies available when coming out of idle, and due to
hardware limitations we cannot change the arbitration between the CPU and
GPU quickly enough to be effective.
v2: Limit each client to receiving a single boost for each active period.
Tested by QA to only marginally increase power, and to demonstrably
increase throughput in games. No latency measurements yet.
v3: Cater for front-buffer rendering with manual throttling.
v4: Tidy up.
v5: Sadly the compositor needs frequent boosts as it may never idle, but
due to its picking mechanism (using ReadPixels) may require frequent
waits. Those waits, along with the waits for the vrefresh swap, conspire
to keep the GPU at low frequencies despite the interactive latency. To
overcome this we ditch the one-boost-per-active-period and just ratelimit
the number of wait-boosts each client can receive.
Reported-and-tested-by: Paul Neumann <paul104x@yahoo.de>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68716
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Stéphane Marchesin <stephane.marchesin@gmail.com>
Cc: Owen Taylor <otaylor@redhat.com>
Cc: "Meng, Mengmeng" <mengmeng.meng@intel.com>
Cc: "Zhuang, Lena" <lena.zhuang@intel.com>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
[danvet: No extern for function prototypes in headers.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-09-25 23:34:56 +07:00
|
|
|
struct delayed_work idle_work;
|
2008-07-31 02:06:12 +07:00
|
|
|
} mm;
|
2012-06-05 04:42:43 +07:00
|
|
|
struct idr context_idr;
|
2013-06-12 16:35:28 +07:00
|
|
|
|
drm/i915: Boost RPS frequency for CPU stalls
If we encounter a situation where the CPU blocks waiting for results
from the GPU, give the GPU a kick to boost its the frequency.
This should work to reduce user interface stalls and to quickly promote
mesa to high frequencies - but the cost is that our requested frequency
stalls high (as we do not idle for long enough before rc6 to start
reducing frequencies, nor are we aggressive at down clocking an
underused GPU). However, this should be mitigated by rc6 itself powering
off the GPU when idle, and that energy use is dependent upon the workload
of the GPU in addition to its frequency (e.g. the math or sampler
functions only consume power when used). Still, this is likely to
adversely affect light workloads.
In particular, this nearly eliminates the highly noticeable wake-up lag
in animations from idle. For example, expose or workspace transitions.
(However, given the situation where we fail to downclock, our requested
frequency is almost always the maximum, except for Baytrail where we
manually downclock upon idling. This often masks the latency of
upclocking after being idle, so animations are typically smooth - at the
cost of increased power consumption.)
Stéphane raised the concern that this will punish good applications and
reward bad applications - but due to the nature of how mesa performs its
client throttling, I believe all mesa applications will be roughly
equally affected. To address this concern, and to prevent applications
like compositors from permanently boosting the RPS state, we ratelimit the
frequency of the wait-boosts each client recieves.
Unfortunately, this techinique is ineffective with Ironlake - which also
has dynamic render power states and suffers just as dramatically. For
Ironlake, the thermal/power headroom is shared with the CPU through
Intelligent Power Sharing and the intel-ips module. This leaves us with
no GPU boost frequencies available when coming out of idle, and due to
hardware limitations we cannot change the arbitration between the CPU and
GPU quickly enough to be effective.
v2: Limit each client to receiving a single boost for each active period.
Tested by QA to only marginally increase power, and to demonstrably
increase throughput in games. No latency measurements yet.
v3: Cater for front-buffer rendering with manual throttling.
v4: Tidy up.
v5: Sadly the compositor needs frequent boosts as it may never idle, but
due to its picking mechanism (using ReadPixels) may require frequent
waits. Those waits, along with the waits for the vrefresh swap, conspire
to keep the GPU at low frequencies despite the interactive latency. To
overcome this we ditch the one-boost-per-active-period and just ratelimit
the number of wait-boosts each client can receive.
Reported-and-tested-by: Paul Neumann <paul104x@yahoo.de>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68716
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Stéphane Marchesin <stephane.marchesin@gmail.com>
Cc: Owen Taylor <otaylor@redhat.com>
Cc: "Meng, Mengmeng" <mengmeng.meng@intel.com>
Cc: "Zhuang, Lena" <lena.zhuang@intel.com>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
[danvet: No extern for function prototypes in headers.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-09-25 23:34:56 +07:00
|
|
|
atomic_t rps_wait_boost;
|
2014-05-22 20:13:33 +07:00
|
|
|
struct intel_engine_cs *bsd_ring;
|
2008-07-31 02:06:12 +07:00
|
|
|
};
|
|
|
|
|
2014-02-19 01:15:46 +07:00
|
|
|
/*
|
|
|
|
* A command that requires special handling by the command parser.
|
|
|
|
*/
|
|
|
|
struct drm_i915_cmd_descriptor {
|
|
|
|
/*
|
|
|
|
* Flags describing how the command parser processes the command.
|
|
|
|
*
|
|
|
|
* CMD_DESC_FIXED: The command has a fixed length if this is set,
|
|
|
|
* a length mask if not set
|
|
|
|
* CMD_DESC_SKIP: The command is allowed but does not follow the
|
|
|
|
* standard length encoding for the opcode range in
|
|
|
|
* which it falls
|
|
|
|
* CMD_DESC_REJECT: The command is never allowed
|
|
|
|
* CMD_DESC_REGISTER: The command should be checked against the
|
|
|
|
* register whitelist for the appropriate ring
|
|
|
|
* CMD_DESC_MASTER: The command is allowed if the submitting process
|
|
|
|
* is the DRM master
|
|
|
|
*/
|
|
|
|
u32 flags;
|
|
|
|
#define CMD_DESC_FIXED (1<<0)
|
|
|
|
#define CMD_DESC_SKIP (1<<1)
|
|
|
|
#define CMD_DESC_REJECT (1<<2)
|
|
|
|
#define CMD_DESC_REGISTER (1<<3)
|
|
|
|
#define CMD_DESC_BITMASK (1<<4)
|
|
|
|
#define CMD_DESC_MASTER (1<<5)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The command's unique identification bits and the bitmask to get them.
|
|
|
|
* This isn't strictly the opcode field as defined in the spec and may
|
|
|
|
* also include type, subtype, and/or subop fields.
|
|
|
|
*/
|
|
|
|
struct {
|
|
|
|
u32 value;
|
|
|
|
u32 mask;
|
|
|
|
} cmd;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The command's length. The command is either fixed length (i.e. does
|
|
|
|
* not include a length field) or has a length field mask. The flag
|
|
|
|
* CMD_DESC_FIXED indicates a fixed length. Otherwise, the command has
|
|
|
|
* a length mask. All command entries in a command table must include
|
|
|
|
* length information.
|
|
|
|
*/
|
|
|
|
union {
|
|
|
|
u32 fixed;
|
|
|
|
u32 mask;
|
|
|
|
} length;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Describes where to find a register address in the command to check
|
|
|
|
* against the ring's register whitelist. Only valid if flags has the
|
|
|
|
* CMD_DESC_REGISTER bit set.
|
|
|
|
*/
|
|
|
|
struct {
|
|
|
|
u32 offset;
|
|
|
|
u32 mask;
|
|
|
|
} reg;
|
|
|
|
|
|
|
|
#define MAX_CMD_DESC_BITMASKS 3
|
|
|
|
/*
|
|
|
|
* Describes command checks where a particular dword is masked and
|
|
|
|
* compared against an expected value. If the command does not match
|
|
|
|
* the expected value, the parser rejects it. Only valid if flags has
|
|
|
|
* the CMD_DESC_BITMASK bit set. Only entries where mask is non-zero
|
|
|
|
* are valid.
|
2014-02-19 01:15:54 +07:00
|
|
|
*
|
|
|
|
* If the check specifies a non-zero condition_mask then the parser
|
|
|
|
* only performs the check when the bits specified by condition_mask
|
|
|
|
* are non-zero.
|
2014-02-19 01:15:46 +07:00
|
|
|
*/
|
|
|
|
struct {
|
|
|
|
u32 offset;
|
|
|
|
u32 mask;
|
|
|
|
u32 expected;
|
2014-02-19 01:15:54 +07:00
|
|
|
u32 condition_offset;
|
|
|
|
u32 condition_mask;
|
2014-02-19 01:15:46 +07:00
|
|
|
} bits[MAX_CMD_DESC_BITMASKS];
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A table of commands requiring special handling by the command parser.
|
|
|
|
*
|
|
|
|
* Each ring has an array of tables. Each table consists of an array of command
|
|
|
|
* descriptors, which must be sorted with command opcodes in ascending order.
|
|
|
|
*/
|
|
|
|
struct drm_i915_cmd_table {
|
|
|
|
const struct drm_i915_cmd_descriptor *table;
|
|
|
|
int count;
|
|
|
|
};
|
|
|
|
|
2014-08-10 01:18:43 +07:00
|
|
|
/* Note that the (struct drm_i915_private *) cast is just to shut up gcc. */
|
2014-08-13 18:14:12 +07:00
|
|
|
#define __I915__(p) ({ \
|
|
|
|
struct drm_i915_private *__p; \
|
|
|
|
if (__builtin_types_compatible_p(typeof(*p), struct drm_i915_private)) \
|
|
|
|
__p = (struct drm_i915_private *)p; \
|
|
|
|
else if (__builtin_types_compatible_p(typeof(*p), struct drm_device)) \
|
|
|
|
__p = to_i915((struct drm_device *)p); \
|
|
|
|
else \
|
|
|
|
BUILD_BUG(); \
|
|
|
|
__p; \
|
|
|
|
})
|
2014-08-10 01:18:43 +07:00
|
|
|
#define INTEL_INFO(p) (&__I915__(p)->info)
|
2014-08-10 01:18:42 +07:00
|
|
|
#define INTEL_DEVID(p) (INTEL_INFO(p)->device_id)
|
2010-11-09 16:17:32 +07:00
|
|
|
|
2014-08-10 01:18:42 +07:00
|
|
|
#define IS_I830(dev) (INTEL_DEVID(dev) == 0x3577)
|
|
|
|
#define IS_845G(dev) (INTEL_DEVID(dev) == 0x2562)
|
2010-11-09 16:17:32 +07:00
|
|
|
#define IS_I85X(dev) (INTEL_INFO(dev)->is_i85x)
|
2014-08-10 01:18:42 +07:00
|
|
|
#define IS_I865G(dev) (INTEL_DEVID(dev) == 0x2572)
|
2010-11-09 16:17:32 +07:00
|
|
|
#define IS_I915G(dev) (INTEL_INFO(dev)->is_i915g)
|
2014-08-10 01:18:42 +07:00
|
|
|
#define IS_I915GM(dev) (INTEL_DEVID(dev) == 0x2592)
|
|
|
|
#define IS_I945G(dev) (INTEL_DEVID(dev) == 0x2772)
|
2010-11-09 16:17:32 +07:00
|
|
|
#define IS_I945GM(dev) (INTEL_INFO(dev)->is_i945gm)
|
|
|
|
#define IS_BROADWATER(dev) (INTEL_INFO(dev)->is_broadwater)
|
|
|
|
#define IS_CRESTLINE(dev) (INTEL_INFO(dev)->is_crestline)
|
2014-08-10 01:18:42 +07:00
|
|
|
#define IS_GM45(dev) (INTEL_DEVID(dev) == 0x2A42)
|
2010-11-09 16:17:32 +07:00
|
|
|
#define IS_G4X(dev) (INTEL_INFO(dev)->is_g4x)
|
2014-08-10 01:18:42 +07:00
|
|
|
#define IS_PINEVIEW_G(dev) (INTEL_DEVID(dev) == 0xa001)
|
|
|
|
#define IS_PINEVIEW_M(dev) (INTEL_DEVID(dev) == 0xa011)
|
2010-11-09 16:17:32 +07:00
|
|
|
#define IS_PINEVIEW(dev) (INTEL_INFO(dev)->is_pineview)
|
|
|
|
#define IS_G33(dev) (INTEL_INFO(dev)->is_g33)
|
2014-08-10 01:18:42 +07:00
|
|
|
#define IS_IRONLAKE_M(dev) (INTEL_DEVID(dev) == 0x0046)
|
2011-04-29 04:33:09 +07:00
|
|
|
#define IS_IVYBRIDGE(dev) (INTEL_INFO(dev)->is_ivybridge)
|
2014-08-10 01:18:42 +07:00
|
|
|
#define IS_IVB_GT1(dev) (INTEL_DEVID(dev) == 0x0156 || \
|
|
|
|
INTEL_DEVID(dev) == 0x0152 || \
|
|
|
|
INTEL_DEVID(dev) == 0x015a)
|
|
|
|
#define IS_SNB_GT1(dev) (INTEL_DEVID(dev) == 0x0102 || \
|
|
|
|
INTEL_DEVID(dev) == 0x0106 || \
|
|
|
|
INTEL_DEVID(dev) == 0x010A)
|
2012-03-29 03:39:21 +07:00
|
|
|
#define IS_VALLEYVIEW(dev) (INTEL_INFO(dev)->is_valleyview)
|
2014-04-09 17:28:00 +07:00
|
|
|
#define IS_CHERRYVIEW(dev) (INTEL_INFO(dev)->is_valleyview && IS_GEN8(dev))
|
2012-03-29 22:32:18 +07:00
|
|
|
#define IS_HASWELL(dev) (INTEL_INFO(dev)->is_haswell)
|
2014-04-09 17:27:59 +07:00
|
|
|
#define IS_BROADWELL(dev) (!INTEL_INFO(dev)->is_valleyview && IS_GEN8(dev))
|
2014-04-02 12:54:50 +07:00
|
|
|
#define IS_SKYLAKE(dev) (INTEL_INFO(dev)->is_skylake)
|
2010-11-09 16:17:32 +07:00
|
|
|
#define IS_MOBILE(dev) (INTEL_INFO(dev)->is_mobile)
|
2013-08-13 00:34:08 +07:00
|
|
|
#define IS_HSW_EARLY_SDV(dev) (IS_HASWELL(dev) && \
|
2014-08-10 01:18:42 +07:00
|
|
|
(INTEL_DEVID(dev) & 0xFF00) == 0x0C00)
|
2013-11-09 01:20:06 +07:00
|
|
|
#define IS_BDW_ULT(dev) (IS_BROADWELL(dev) && \
|
2014-08-10 01:18:42 +07:00
|
|
|
((INTEL_DEVID(dev) & 0xf) == 0x2 || \
|
|
|
|
(INTEL_DEVID(dev) & 0xf) == 0x6 || \
|
|
|
|
(INTEL_DEVID(dev) & 0xf) == 0xe))
|
2014-09-20 07:16:26 +07:00
|
|
|
#define IS_BDW_GT3(dev) (IS_BROADWELL(dev) && \
|
|
|
|
(INTEL_DEVID(dev) & 0x00F0) == 0x0020)
|
2013-11-09 01:20:06 +07:00
|
|
|
#define IS_HSW_ULT(dev) (IS_HASWELL(dev) && \
|
2014-08-10 01:18:42 +07:00
|
|
|
(INTEL_DEVID(dev) & 0xFF00) == 0x0A00)
|
2013-08-29 02:45:46 +07:00
|
|
|
#define IS_HSW_GT3(dev) (IS_HASWELL(dev) && \
|
2014-08-10 01:18:42 +07:00
|
|
|
(INTEL_DEVID(dev) & 0x00F0) == 0x0020)
|
2014-04-29 21:00:22 +07:00
|
|
|
/* ULX machines are also considered ULT. */
|
2014-08-10 01:18:42 +07:00
|
|
|
#define IS_HSW_ULX(dev) (INTEL_DEVID(dev) == 0x0A0E || \
|
|
|
|
INTEL_DEVID(dev) == 0x0A1E)
|
2013-08-24 06:00:07 +07:00
|
|
|
#define IS_PRELIMINARY_HW(intel_info) ((intel_info)->is_preliminary)
|
2010-11-09 16:17:32 +07:00
|
|
|
|
2011-04-07 02:11:14 +07:00
|
|
|
/*
|
|
|
|
* The genX designation typically refers to the render engine, so render
|
|
|
|
* capability related checks should use IS_GEN, while display and other checks
|
|
|
|
* have their own (e.g. HAS_PCH_SPLIT for ILK+ display, IS_foo for particular
|
|
|
|
* chips, etc.).
|
|
|
|
*/
|
2010-11-09 16:17:32 +07:00
|
|
|
#define IS_GEN2(dev) (INTEL_INFO(dev)->gen == 2)
|
|
|
|
#define IS_GEN3(dev) (INTEL_INFO(dev)->gen == 3)
|
|
|
|
#define IS_GEN4(dev) (INTEL_INFO(dev)->gen == 4)
|
|
|
|
#define IS_GEN5(dev) (INTEL_INFO(dev)->gen == 5)
|
|
|
|
#define IS_GEN6(dev) (INTEL_INFO(dev)->gen == 6)
|
2011-04-07 02:11:14 +07:00
|
|
|
#define IS_GEN7(dev) (INTEL_INFO(dev)->gen == 7)
|
2013-11-03 11:06:59 +07:00
|
|
|
#define IS_GEN8(dev) (INTEL_INFO(dev)->gen == 8)
|
2013-02-13 22:27:24 +07:00
|
|
|
#define IS_GEN9(dev) (INTEL_INFO(dev)->gen == 9)
|
2010-11-09 16:17:32 +07:00
|
|
|
|
2013-10-16 00:02:57 +07:00
|
|
|
#define RENDER_RING (1<<RCS)
|
|
|
|
#define BSD_RING (1<<VCS)
|
|
|
|
#define BLT_RING (1<<BCS)
|
|
|
|
#define VEBOX_RING (1<<VECS)
|
2014-04-17 09:37:37 +07:00
|
|
|
#define BSD2_RING (1<<VCS2)
|
2014-04-19 04:04:27 +07:00
|
|
|
#define HAS_BSD(dev) (INTEL_INFO(dev)->ring_mask & BSD_RING)
|
2014-04-17 09:37:37 +07:00
|
|
|
#define HAS_BSD2(dev) (INTEL_INFO(dev)->ring_mask & BSD2_RING)
|
2014-04-19 04:04:27 +07:00
|
|
|
#define HAS_BLT(dev) (INTEL_INFO(dev)->ring_mask & BLT_RING)
|
|
|
|
#define HAS_VEBOX(dev) (INTEL_INFO(dev)->ring_mask & VEBOX_RING)
|
|
|
|
#define HAS_LLC(dev) (INTEL_INFO(dev)->has_llc)
|
|
|
|
#define HAS_WT(dev) ((IS_HASWELL(dev) || IS_BROADWELL(dev)) && \
|
2014-08-25 01:35:31 +07:00
|
|
|
__I915__(dev)->ellc_size)
|
2010-11-09 16:17:32 +07:00
|
|
|
#define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws)
|
|
|
|
|
drm/i915: preliminary context support
Very basic code for context setup/destruction in the driver.
Adds the file i915_gem_context.c This file implements HW context
support. On gen5+ a HW context consists of an opaque GPU object which is
referenced at times of context saves and restores. With RC6 enabled,
the context is also referenced as the GPU enters and exists from RC6
(GPU has it's own internal power context, except on gen5). Though
something like a context does exist for the media ring, the code only
supports contexts for the render ring.
In software, there is a distinction between contexts created by the
user, and the default HW context. The default HW context is used by GPU
clients that do not request setup of their own hardware context. The
default context's state is never restored to help prevent programming
errors. This would happen if a client ran and piggy-backed off another
clients GPU state. The default context only exists to give the GPU some
offset to load as the current to invoke a save of the context we
actually care about. In fact, the code could likely be constructed,
albeit in a more complicated fashion, to never use the default context,
though that limits the driver's ability to swap out, and/or destroy
other contexts.
All other contexts are created as a request by the GPU client. These
contexts store GPU state, and thus allow GPU clients to not re-emit
state (and potentially query certain state) at any time. The kernel
driver makes certain that the appropriate commands are inserted.
There are 4 entry points into the contexts, init, fini, open, close.
The names are self-explanatory except that init can be called during
reset, and also during pm thaw/resume. As we expect our context to be
preserved across these events, we do not reinitialize in this case.
As Adam Jackson pointed out, The cutoff of 1MB where a HW context is
considered too big is arbitrary. The reason for this is even though
context sizes are increasing with every generation, they have yet to
eclipse even 32k. If we somehow read back way more than that, it
probably means BIOS has done something strange, or we're running on a
platform that wasn't designed for this.
v2: rename load/unload to init/fini (daniel)
remove ILK support for get_size() (indirectly daniel)
add HAS_HW_CONTEXTS macro to clarify supported platforms (daniel)
added comments (Ben)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
2012-06-05 04:42:42 +07:00
|
|
|
#define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6)
|
2014-07-24 23:04:49 +07:00
|
|
|
#define HAS_LOGICAL_RING_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 8)
|
2014-08-05 21:51:18 +07:00
|
|
|
#define USES_PPGTT(dev) (i915.enable_ppgtt)
|
|
|
|
#define USES_FULL_PPGTT(dev) (i915.enable_ppgtt == 2)
|
2012-02-09 23:15:46 +07:00
|
|
|
|
2010-11-09 02:18:58 +07:00
|
|
|
#define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay)
|
2010-11-09 16:17:32 +07:00
|
|
|
#define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical)
|
|
|
|
|
2012-12-17 22:21:27 +07:00
|
|
|
/* Early gen2 have a totally busted CS tlb and require pinned batches. */
|
|
|
|
#define HAS_BROKEN_CS_TLB(dev) (IS_I830(dev) || IS_845G(dev))
|
2014-02-07 22:33:20 +07:00
|
|
|
/*
|
|
|
|
* dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts
|
|
|
|
* even when in MSI mode. This results in spurious interrupt warnings if the
|
|
|
|
* legacy irq no. is shared with another device. The kernel then disables that
|
|
|
|
* interrupt source and so prevents the other device from working properly.
|
|
|
|
*/
|
|
|
|
#define HAS_AUX_IRQ(dev) (INTEL_INFO(dev)->gen >= 5)
|
|
|
|
#define HAS_GMBUS_IRQ(dev) (INTEL_INFO(dev)->gen >= 5)
|
2012-12-17 22:21:27 +07:00
|
|
|
|
2010-11-09 16:17:32 +07:00
|
|
|
/* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte
|
|
|
|
* rows, which changed the alignment requirements and fence programming.
|
|
|
|
*/
|
|
|
|
#define HAS_128_BYTE_Y_TILING(dev) (!IS_GEN2(dev) && !(IS_I915G(dev) || \
|
|
|
|
IS_I915GM(dev)))
|
|
|
|
#define SUPPORTS_DIGITAL_OUTPUTS(dev) (!IS_GEN2(dev) && !IS_PINEVIEW(dev))
|
|
|
|
#define SUPPORTS_INTEGRATED_HDMI(dev) (IS_G4X(dev) || IS_GEN5(dev))
|
|
|
|
#define SUPPORTS_INTEGRATED_DP(dev) (IS_G4X(dev) || IS_GEN5(dev))
|
|
|
|
#define SUPPORTS_TV(dev) (INTEL_INFO(dev)->supports_tv)
|
|
|
|
#define I915_HAS_HOTPLUG(dev) (INTEL_INFO(dev)->has_hotplug)
|
|
|
|
|
|
|
|
#define HAS_FW_BLC(dev) (INTEL_INFO(dev)->gen > 2)
|
|
|
|
#define HAS_PIPE_CXSR(dev) (INTEL_INFO(dev)->has_pipe_cxsr)
|
2014-01-10 14:50:12 +07:00
|
|
|
#define HAS_FBC(dev) (INTEL_INFO(dev)->has_fbc)
|
2010-11-09 16:17:32 +07:00
|
|
|
|
2014-10-02 02:04:14 +07:00
|
|
|
#define HAS_IPS(dev) (IS_HSW_ULT(dev) || IS_BROADWELL(dev))
|
2013-06-25 00:29:34 +07:00
|
|
|
|
2013-04-23 00:40:39 +07:00
|
|
|
#define HAS_DDI(dev) (INTEL_INFO(dev)->has_ddi)
|
2013-04-23 00:40:41 +07:00
|
|
|
#define HAS_FPGA_DBG_UNCLAIMED(dev) (INTEL_INFO(dev)->has_fpga_dbg)
|
2014-11-20 18:44:37 +07:00
|
|
|
#define HAS_PSR(dev) (IS_HASWELL(dev) || IS_BROADWELL(dev) || \
|
|
|
|
IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev))
|
2014-03-08 06:12:37 +07:00
|
|
|
#define HAS_RUNTIME_PM(dev) (IS_GEN6(dev) || IS_HASWELL(dev) || \
|
2014-04-15 00:41:30 +07:00
|
|
|
IS_BROADWELL(dev) || IS_VALLEYVIEW(dev))
|
2014-10-07 21:06:50 +07:00
|
|
|
#define HAS_RC6(dev) (INTEL_INFO(dev)->gen >= 6)
|
|
|
|
#define HAS_RC6p(dev) (INTEL_INFO(dev)->gen == 6 || IS_IVYBRIDGE(dev))
|
2012-11-24 00:30:39 +07:00
|
|
|
|
2012-11-21 00:12:07 +07:00
|
|
|
#define INTEL_PCH_DEVICE_ID_MASK 0xff00
|
|
|
|
#define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00
|
|
|
|
#define INTEL_PCH_CPT_DEVICE_ID_TYPE 0x1c00
|
|
|
|
#define INTEL_PCH_PPT_DEVICE_ID_TYPE 0x1e00
|
|
|
|
#define INTEL_PCH_LPT_DEVICE_ID_TYPE 0x8c00
|
|
|
|
#define INTEL_PCH_LPT_LP_DEVICE_ID_TYPE 0x9c00
|
2014-04-09 12:38:57 +07:00
|
|
|
#define INTEL_PCH_SPT_DEVICE_ID_TYPE 0xA100
|
|
|
|
#define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE 0x9D00
|
2012-11-21 00:12:07 +07:00
|
|
|
|
2014-08-25 01:35:31 +07:00
|
|
|
#define INTEL_PCH_TYPE(dev) (__I915__(dev)->pch_type)
|
2014-04-09 12:38:57 +07:00
|
|
|
#define HAS_PCH_SPT(dev) (INTEL_PCH_TYPE(dev) == PCH_SPT)
|
2012-03-29 22:32:20 +07:00
|
|
|
#define HAS_PCH_LPT(dev) (INTEL_PCH_TYPE(dev) == PCH_LPT)
|
2010-11-09 16:17:32 +07:00
|
|
|
#define HAS_PCH_CPT(dev) (INTEL_PCH_TYPE(dev) == PCH_CPT)
|
|
|
|
#define HAS_PCH_IBX(dev) (INTEL_PCH_TYPE(dev) == PCH_IBX)
|
2013-04-06 03:12:40 +07:00
|
|
|
#define HAS_PCH_NOP(dev) (INTEL_PCH_TYPE(dev) == PCH_NOP)
|
2012-07-04 01:57:32 +07:00
|
|
|
#define HAS_PCH_SPLIT(dev) (INTEL_PCH_TYPE(dev) != PCH_NONE)
|
2010-11-09 16:17:32 +07:00
|
|
|
|
2014-07-21 16:53:38 +07:00
|
|
|
#define HAS_GMCH_DISPLAY(dev) (INTEL_INFO(dev)->gen < 5 || IS_VALLEYVIEW(dev))
|
|
|
|
|
2013-09-20 01:01:40 +07:00
|
|
|
/* DPF == dynamic parity feature */
|
|
|
|
#define HAS_L3_DPF(dev) (IS_IVYBRIDGE(dev) || IS_HASWELL(dev))
|
|
|
|
#define NUM_L3_SLICES(dev) (IS_HSW_GT3(dev) ? 2 : HAS_L3_DPF(dev))
|
2012-07-25 10:47:31 +07:00
|
|
|
|
2012-09-08 09:43:39 +07:00
|
|
|
#define GT_FREQUENCY_MULTIPLIER 50
|
|
|
|
|
2010-11-09 02:18:58 +07:00
|
|
|
#include "i915_trace.h"
|
|
|
|
|
2013-08-03 00:27:49 +07:00
|
|
|
extern const struct drm_ioctl_desc i915_ioctls[];
|
2005-09-30 15:37:36 +07:00
|
|
|
extern int i915_max_ioctl;
|
|
|
|
|
2014-10-23 23:23:27 +07:00
|
|
|
extern int i915_suspend_legacy(struct drm_device *dev, pm_message_t state);
|
|
|
|
extern int i915_resume_legacy(struct drm_device *dev);
|
2008-11-28 11:22:24 +07:00
|
|
|
extern int i915_master_create(struct drm_device *dev, struct drm_master *master);
|
|
|
|
extern void i915_master_destroy(struct drm_device *dev, struct drm_master *master);
|
|
|
|
|
2014-01-21 16:24:25 +07:00
|
|
|
/* i915_params.c */
|
|
|
|
struct i915_params {
|
|
|
|
int modeset;
|
|
|
|
int panel_ignore_lid;
|
|
|
|
unsigned int powersave;
|
|
|
|
int semaphores;
|
|
|
|
unsigned int lvds_downclock;
|
|
|
|
int lvds_channel_mode;
|
|
|
|
int panel_use_ssc;
|
|
|
|
int vbt_sdvo_panel_type;
|
|
|
|
int enable_rc6;
|
|
|
|
int enable_fbc;
|
|
|
|
int enable_ppgtt;
|
2014-07-24 23:04:11 +07:00
|
|
|
int enable_execlists;
|
2014-01-21 16:24:25 +07:00
|
|
|
int enable_psr;
|
|
|
|
unsigned int preliminary_hw_support;
|
|
|
|
int disable_power_well;
|
|
|
|
int enable_ips;
|
2014-02-08 02:12:53 +07:00
|
|
|
int invert_brightness;
|
2014-02-19 01:15:46 +07:00
|
|
|
int enable_cmd_parser;
|
2014-02-08 02:12:53 +07:00
|
|
|
/* leave bools at the end to not create holes */
|
|
|
|
bool enable_hangcheck;
|
|
|
|
bool fastboot;
|
2014-01-21 16:24:25 +07:00
|
|
|
bool prefault_disable;
|
|
|
|
bool reset;
|
2014-02-11 00:20:55 +07:00
|
|
|
bool disable_display;
|
2014-04-01 14:33:47 +07:00
|
|
|
bool disable_vtd_wa;
|
drm/i915: Replaced Blitter ring based flips with MMIO flips
This patch enables the framework for using MMIO based flip calls,
in contrast with the CS based flip calls which are being used currently.
MMIO based flip calls can be enabled on architectures where
Render and Blitter engines reside in different power wells. The
decision to use MMIO flips can be made based on workloads to give
100% residency for Media power well.
v2: The MMIO flips now use the interrupt driven mechanism for issuing the
flips when target seqno is reached. (Incorporating Ville's idea)
v3: Rebasing on latest code. Code restructuring after incorporating
Damien's comments
v4: Addressing Ville's review comments
-general cleanup
-updating only base addr instead of calling update_primary_plane
-extending patch for gen5+ platforms
v5: Addressed Ville's review comments
-Making mmio flip vs cs flip selection based on module parameter
-Adding check for DRIVER_MODESET feature in notify_ring before calling
notify mmio flip.
-Other changes mostly in function arguments
v6: -Having a seperate function to check condition for using mmio flips (Ville)
-propogating error code from i915_gem_check_olr (Ville)
v7: -Adding __must_check with i915_gem_check_olr (Chris)
-Renaming mmio_flip_data to mmio_flip (Chris)
-Rebasing on latest nightly
v8: -Rebasing on latest code
-squash 3rd patch in series(mmio setbase vs page flip race) with this patch
-Added new tiling mode update in intel_do_mmio_flip (Chris)
v9: -check for obj->last_write_seqno being 0 instead of obj->ring being NULL in
intel_postpone_flip, as this is a more restrictive condition (Chris)
v10: -Applied Chris's suggestions for squashing patches 2,3 into this patch.
These patches make the selection of CS vs MMIO flip at the page flip time, and
make the module parameter for using mmio flips as tristate, the states being
'force CS flips', 'force mmio flips', 'driver discretion'.
Changed the logic for driver discretion (Chris)
v11: Minor code cleanup(better readability, fixing whitespace errors, using
lockdep to check mutex locked status in postpone_flip, removal of __must_check
in function definition) (Chris)
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Akash Goel <akash.goel@intel.com>
Tested-by: Chris Wilson <chris@chris-wilson.co.uk> # snb, ivb
[danvet: Fix up parameter alignement checkpatch spotted.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-06-02 18:17:17 +07:00
|
|
|
int use_mmio_flip;
|
drm/i915: reorganize the unclaimed register detection code
The current code only runs when we do an I915_WRITE operation. It
checks if the unclaimed register flag is set before we do the
operation, and then it checks it again after we do the operation. This
double check allows us to find out if the I915_WRITE operation in
question is the bad one, or if some previous code is the bad one. When
it finds a problem, our code uses DRM_ERROR to signal it.
The good thing about the current code is that it detects the problem,
so at least we can know we did something wrong. The problem is that
even though we find the problem, we don't really have much information
to actually debug it. So whenever I see one of these DRM_ERROR
messages on my systems, the first thing I do is apply a patch to
change the DRM_ERROR to a WARN and also check for unclaimed registers
on I915_READ operations. This local patch makes things even slower,
but it usually helps a lot in finding the bad code.
The first point here is that since the current code is only useful to
detect whether we have a problem or not, but it is not really good to
find the cause of the problem, I don't think we should be checking
both before and after every I915_WRITE operation: just doing the check
once should be enough for us to quickly detect problems. With this
change, the code that runs by default for every single user will only
do 1 read operation for every single I915_WRITE, instead of 2. This
patch does this change.
The second point is that the local patch I have should be upstream,
but since it makes things slower it should be disabled by default. So
I added the i915.mmio_debug option to enable it.
So after this patch, this is what will happen:
- By default, we will try to detect unclaimed registers once after
every I915_WRITE operation. Previously we tried twice for every
I915_WRITE.
- When we find an unclaimed register we will still print a DRM_ERROR
message, but we will now tell the user to try again with
i915.mmio_debug=1.
- When we use i915.mmio_debug=1 we will try to find unclaimed
registers both before and after every I915_READ and I915_WRITE
operation, and we will print stack traces in case we find them.
This should really help locating the exact point of the bad code
(or at least finding out that i915.ko is not the problem).
This commit also opens space for really-slow register debugging
operations on other platforms. In theory we can now add lots and lots
of debug code behind i915.mmio_debug, enable this option on our tests,
and catch more problems.
v2: - Remove not-so-useful comments (Daniel)
- Fix the param definition macros (Rodrigo)
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-07-17 03:49:29 +07:00
|
|
|
bool mmio_debug;
|
2014-01-21 16:24:25 +07:00
|
|
|
};
|
|
|
|
extern struct i915_params i915 __read_mostly;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* i915_dma.c */
|
2005-11-10 18:16:34 +07:00
|
|
|
extern int i915_driver_load(struct drm_device *, unsigned long flags);
|
2007-11-22 11:14:14 +07:00
|
|
|
extern int i915_driver_unload(struct drm_device *);
|
2014-06-27 00:23:52 +07:00
|
|
|
extern int i915_driver_open(struct drm_device *dev, struct drm_file *file);
|
2007-07-11 12:53:27 +07:00
|
|
|
extern void i915_driver_lastclose(struct drm_device * dev);
|
2007-08-25 17:23:09 +07:00
|
|
|
extern void i915_driver_preclose(struct drm_device *dev,
|
2014-06-27 00:23:52 +07:00
|
|
|
struct drm_file *file);
|
2008-07-31 02:06:12 +07:00
|
|
|
extern void i915_driver_postclose(struct drm_device *dev,
|
2014-06-27 00:23:52 +07:00
|
|
|
struct drm_file *file);
|
2007-07-11 12:53:27 +07:00
|
|
|
extern int i915_driver_device_is_agp(struct drm_device * dev);
|
2012-04-17 04:07:40 +07:00
|
|
|
#ifdef CONFIG_COMPAT
|
2006-01-02 16:14:23 +07:00
|
|
|
extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
|
|
|
|
unsigned long arg);
|
2012-04-17 04:07:40 +07:00
|
|
|
#endif
|
2012-06-05 04:42:56 +07:00
|
|
|
extern int intel_gpu_reset(struct drm_device *dev);
|
2012-04-27 20:17:44 +07:00
|
|
|
extern int i915_reset(struct drm_device *dev);
|
2010-05-21 04:28:11 +07:00
|
|
|
extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
|
|
|
|
extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
|
|
|
|
extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
|
|
|
|
extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
|
2014-04-18 20:35:02 +07:00
|
|
|
int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
|
2014-08-18 18:42:44 +07:00
|
|
|
void intel_hpd_cancel_work(struct drm_i915_private *dev_priv);
|
2010-05-21 04:28:11 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* i915_irq.c */
|
2013-07-03 21:22:08 +07:00
|
|
|
void i915_queue_hangcheck(struct drm_device *dev);
|
2014-02-25 22:11:26 +07:00
|
|
|
__printf(3, 4)
|
|
|
|
void i915_handle_error(struct drm_device *dev, bool wedged,
|
|
|
|
const char *fmt, ...);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2014-09-30 15:56:44 +07:00
|
|
|
extern void intel_irq_init(struct drm_i915_private *dev_priv);
|
|
|
|
extern void intel_hpd_init(struct drm_i915_private *dev_priv);
|
2014-09-30 15:56:43 +07:00
|
|
|
int intel_irq_install(struct drm_i915_private *dev_priv);
|
|
|
|
void intel_irq_uninstall(struct drm_i915_private *dev_priv);
|
2013-07-20 02:36:52 +07:00
|
|
|
|
|
|
|
extern void intel_uncore_sanitize(struct drm_device *dev);
|
2014-06-06 16:59:39 +07:00
|
|
|
extern void intel_uncore_early_sanitize(struct drm_device *dev,
|
|
|
|
bool restore_forcewake);
|
2013-07-20 02:36:52 +07:00
|
|
|
extern void intel_uncore_init(struct drm_device *dev);
|
|
|
|
extern void intel_uncore_check_errors(struct drm_device *dev);
|
2013-08-26 19:46:09 +07:00
|
|
|
extern void intel_uncore_fini(struct drm_device *dev);
|
2014-06-12 22:35:45 +07:00
|
|
|
extern void intel_uncore_forcewake_reset(struct drm_device *dev, bool restore);
|
2011-04-07 02:13:38 +07:00
|
|
|
|
2008-11-04 17:03:27 +07:00
|
|
|
void
|
2014-03-31 18:27:21 +07:00
|
|
|
i915_enable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe,
|
2014-02-10 23:42:47 +07:00
|
|
|
u32 status_mask);
|
2008-11-04 17:03:27 +07:00
|
|
|
|
|
|
|
void
|
2014-03-31 18:27:21 +07:00
|
|
|
i915_disable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe,
|
2014-02-10 23:42:47 +07:00
|
|
|
u32 status_mask);
|
2008-11-04 17:03:27 +07:00
|
|
|
|
2014-03-05 00:23:07 +07:00
|
|
|
void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv);
|
|
|
|
void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv);
|
2014-09-30 15:56:46 +07:00
|
|
|
void
|
|
|
|
ironlake_enable_display_irq(struct drm_i915_private *dev_priv, u32 mask);
|
|
|
|
void
|
|
|
|
ironlake_disable_display_irq(struct drm_i915_private *dev_priv, u32 mask);
|
|
|
|
void ibx_display_interrupt_update(struct drm_i915_private *dev_priv,
|
|
|
|
uint32_t interrupt_mask,
|
|
|
|
uint32_t enabled_irq_mask);
|
|
|
|
#define ibx_enable_display_interrupt(dev_priv, bits) \
|
|
|
|
ibx_display_interrupt_update((dev_priv), (bits), (bits))
|
|
|
|
#define ibx_disable_display_interrupt(dev_priv, bits) \
|
|
|
|
ibx_display_interrupt_update((dev_priv), (bits), 0)
|
2014-03-05 00:23:07 +07:00
|
|
|
|
2008-07-31 02:06:12 +07:00
|
|
|
/* i915_gem.c */
|
|
|
|
int i915_gem_create_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2008-11-13 01:03:55 +07:00
|
|
|
int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2008-07-31 02:06:12 +07:00
|
|
|
int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2014-07-24 23:04:33 +07:00
|
|
|
void i915_gem_execbuffer_move_to_active(struct list_head *vmas,
|
|
|
|
struct intel_engine_cs *ring);
|
|
|
|
void i915_gem_execbuffer_retire_commands(struct drm_device *dev,
|
|
|
|
struct drm_file *file,
|
|
|
|
struct intel_engine_cs *ring,
|
|
|
|
struct drm_i915_gem_object *obj);
|
2014-07-24 23:04:21 +07:00
|
|
|
int i915_gem_ringbuffer_submission(struct drm_device *dev,
|
|
|
|
struct drm_file *file,
|
|
|
|
struct intel_engine_cs *ring,
|
|
|
|
struct intel_context *ctx,
|
|
|
|
struct drm_i915_gem_execbuffer2 *args,
|
|
|
|
struct list_head *vmas,
|
|
|
|
struct drm_i915_gem_object *batch_obj,
|
|
|
|
u64 exec_start, u32 flags);
|
2008-07-31 02:06:12 +07:00
|
|
|
int i915_gem_execbuffer(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2009-12-18 10:05:42 +07:00
|
|
|
int i915_gem_execbuffer2(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2008-07-31 02:06:12 +07:00
|
|
|
int i915_gem_pin_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2012-09-22 07:01:20 +07:00
|
|
|
int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file);
|
|
|
|
int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file);
|
2008-07-31 02:06:12 +07:00
|
|
|
int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2009-09-14 22:50:29 +07:00
|
|
|
int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2008-07-31 02:06:12 +07:00
|
|
|
int i915_gem_set_tiling(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int i915_gem_get_tiling(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).
v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.
v11: Drop vma behaviour changes -- locking is nigh on impossible.
Use a worker to load user pages to avoid lock inversions.
v12: Use get_task_mm()/mmput() for correct refcounting of mm.
v13: Use a worker to release the mmu_notifier to avoid lock inversion
v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
with its own locking and tree of objects for each mm/mmu_notifier.
v15: Prevent overlapping userptr objects, and invalidate all objects
within the mmu_notifier range
v16: Fix a typo for iterating over multiple objects in the range and
rearrange error path to destroy the mmu_notifier locklessly.
Also close a race between invalidate_range and the get_pages_worker.
v17: Close a race between get_pages_worker/invalidate_range and fresh
allocations of the same userptr range - and notice that
struct_mutex was presumed to be held when during creation it wasn't.
v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory
for the struct sg_table and to clear it before reporting an error.
v19: Always error out on read-only userptr requests as we don't have the
hardware infrastructure to support them at the moment.
v20: Refuse to implement read-only support until we have the required
infrastructure - but reserve the bit in flags for future use.
v21: use_mm() is not required for get_user_pages(). It is only meant to
be used to fix up the kernel thread's current->mm for use with
copy_user().
v22: Use sg_alloc_table_from_pages for that chunky feeling
v23: Export a function for sanity checking dma-buf rather than encode
userptr details elsewhere, and clean up comments based on
suggestions by Bradley.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Akash Goel <akash.goel@intel.com>
Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com>
[danvet: Frob ioctl allocation to pick the next one - will cause a bit
of fuss with create2 apparently, but such are the rules.]
[danvet2: oops, forgot to git add after manual patch application]
[danvet3: Appease sparse.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-05-16 20:22:37 +07:00
|
|
|
int i915_gem_init_userptr(struct drm_device *dev);
|
|
|
|
int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file);
|
2008-10-23 11:40:13 +07:00
|
|
|
int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2012-05-25 05:03:10 +07:00
|
|
|
int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2008-07-31 02:06:12 +07:00
|
|
|
void i915_gem_load(struct drm_device *dev);
|
2014-09-09 17:16:08 +07:00
|
|
|
unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv,
|
|
|
|
long target,
|
|
|
|
unsigned flags);
|
|
|
|
#define I915_SHRINK_PURGEABLE 0x1
|
|
|
|
#define I915_SHRINK_UNBOUND 0x2
|
|
|
|
#define I915_SHRINK_BOUND 0x4
|
2012-11-15 18:32:30 +07:00
|
|
|
void *i915_gem_object_alloc(struct drm_device *dev);
|
|
|
|
void i915_gem_object_free(struct drm_i915_gem_object *obj);
|
2012-06-07 21:38:42 +07:00
|
|
|
void i915_gem_object_init(struct drm_i915_gem_object *obj,
|
|
|
|
const struct drm_i915_gem_object_ops *ops);
|
2010-11-09 02:18:58 +07:00
|
|
|
struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
|
|
|
|
size_t size);
|
2013-12-07 05:11:26 +07:00
|
|
|
void i915_init_vm(struct drm_i915_private *dev_priv,
|
|
|
|
struct i915_address_space *vm);
|
2008-07-31 02:06:12 +07:00
|
|
|
void i915_gem_free_object(struct drm_gem_object *obj);
|
2013-07-18 02:19:03 +07:00
|
|
|
void i915_gem_vma_destroy(struct i915_vma *vma);
|
2012-11-15 18:32:30 +07:00
|
|
|
|
2014-02-14 20:01:11 +07:00
|
|
|
#define PIN_MAPPABLE 0x1
|
|
|
|
#define PIN_NONBLOCK 0x2
|
2014-02-14 20:01:12 +07:00
|
|
|
#define PIN_GLOBAL 0x4
|
drm/i915: Prevent negative relocation deltas from wrapping
This is pure evil. Userspace, I'm looking at you SNA, repacks batch
buffers on the fly after generation as they are being passed to the
kernel for execution. These batches also contain self-referenced
relocations as a single buffer encompasses the state commands, kernels,
vertices and sampler. During generation the buffers are placed at known
offsets within the full batch, and then the relocation deltas (as passed
to the kernel) are tweaked as the batch is repacked into a smaller buffer.
This means that userspace is passing negative relocations deltas, which
subsequently wrap to large values if the batch is at a low address. The
GPU hangs when it then tries to use the large value as a base for its
address offsets, rather than wrapping back to the real value (as one
would hope). As the GPU uses positive offsets from the base, we can
treat the relocation address as the minimum address read by the GPU.
For the upper bound, we trust that userspace will not read beyond the
end of the buffer.
So, how do we fix negative relocations from wrapping? We can either
check that every relocation looks valid when we write it, and then
position each object such that we prevent the offset wraparound, or we
just special-case the self-referential behaviour of SNA and force all
batches to be above 256k. Daniel prefers the latter approach.
This fixes a GPU hang when it tries to use an address (relocation +
offset) greater than the GTT size. The issue would occur quite easily
with full-ppgtt as each fd gets its own VM space, so low offsets would
often be handed out. However, with the rearrangement of the low GTT due
to capturing the BIOS framebuffer, it is already affecting kernels 3.15
onwards. I think only IVB+ is susceptible to this bug, but the workaround
should only kick in rarely, so it seems sensible to always apply it.
v3: Use a bias for batch buffers to prevent small negative delta relocations
from wrapping.
v4 from Daniel:
- s/BIAS/BATCH_OFFSET_BIAS/
- Extract eb_vma_misplaced/i915_vma_misplaced since the conditions
were growing rather cumbersome.
- Add a comment to eb_get_batch explaining why we do this.
- Apply the batch offset bias everywhere but mention that we've only
observed it on gen7 gpus.
- Drop PIN_OFFSET_FIX for now, that slipped in from a feature patch.
v5: Add static to eb_get_batch, spotted by 0-day tester.
Testcase: igt/gem_bad_reloc
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=78533
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v3)
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-05-23 13:48:08 +07:00
|
|
|
#define PIN_OFFSET_BIAS 0x8
|
|
|
|
#define PIN_OFFSET_MASK (~4095)
|
2010-11-23 22:26:33 +07:00
|
|
|
int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
|
2013-08-01 06:59:58 +07:00
|
|
|
struct i915_address_space *vm,
|
2010-11-23 22:26:33 +07:00
|
|
|
uint32_t alignment,
|
drm/i915: Prevent negative relocation deltas from wrapping
This is pure evil. Userspace, I'm looking at you SNA, repacks batch
buffers on the fly after generation as they are being passed to the
kernel for execution. These batches also contain self-referenced
relocations as a single buffer encompasses the state commands, kernels,
vertices and sampler. During generation the buffers are placed at known
offsets within the full batch, and then the relocation deltas (as passed
to the kernel) are tweaked as the batch is repacked into a smaller buffer.
This means that userspace is passing negative relocations deltas, which
subsequently wrap to large values if the batch is at a low address. The
GPU hangs when it then tries to use the large value as a base for its
address offsets, rather than wrapping back to the real value (as one
would hope). As the GPU uses positive offsets from the base, we can
treat the relocation address as the minimum address read by the GPU.
For the upper bound, we trust that userspace will not read beyond the
end of the buffer.
So, how do we fix negative relocations from wrapping? We can either
check that every relocation looks valid when we write it, and then
position each object such that we prevent the offset wraparound, or we
just special-case the self-referential behaviour of SNA and force all
batches to be above 256k. Daniel prefers the latter approach.
This fixes a GPU hang when it tries to use an address (relocation +
offset) greater than the GTT size. The issue would occur quite easily
with full-ppgtt as each fd gets its own VM space, so low offsets would
often be handed out. However, with the rearrangement of the low GTT due
to capturing the BIOS framebuffer, it is already affecting kernels 3.15
onwards. I think only IVB+ is susceptible to this bug, but the workaround
should only kick in rarely, so it seems sensible to always apply it.
v3: Use a bias for batch buffers to prevent small negative delta relocations
from wrapping.
v4 from Daniel:
- s/BIAS/BATCH_OFFSET_BIAS/
- Extract eb_vma_misplaced/i915_vma_misplaced since the conditions
were growing rather cumbersome.
- Add a comment to eb_get_batch explaining why we do this.
- Apply the batch offset bias everywhere but mention that we've only
observed it on gen7 gpus.
- Drop PIN_OFFSET_FIX for now, that slipped in from a feature patch.
v5: Add static to eb_get_batch, spotted by 0-day tester.
Testcase: igt/gem_bad_reloc
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=78533
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v3)
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-05-23 13:48:08 +07:00
|
|
|
uint64_t flags);
|
drm/i915: plumb VM into bind/unbind code
As alluded to in several patches, and it will be reiterated later... A
VMA is an abstraction for a GEM BO bound into an address space.
Therefore it stands to reason, that the existing bind, and unbind are
the ones which will be the most impacted. This patch implements this,
and updates all callers which weren't already updated in the series
(because it was too messy).
This patch represents the bulk of an earlier, larger patch. I've pulled
out a bunch of things by the request of Daniel. The history is preserved
for posterity with the email convention of ">" One big change from the
original patch aside from a bunch of cropping is I've created an
i915_vma_unbind() function. That is because we always have the VMA
anyway, and doing an extra lookup is useful. There is a caveat, we
retain an i915_gem_object_ggtt_unbind, for the global cases which might
not talk in VMAs.
> drm/i915: plumb VM into object operations
>
> This patch was formerly known as:
> "drm/i915: Create VMAs (part 3) - plumbing"
>
> This patch adds a VM argument, bind/unbind, and the object
> offset/size/color getters/setters. It preserves the old ggtt helper
> functions because things still need, and will continue to need them.
>
> Some code will still need to be ported over after this.
>
> v2: Fix purge to pick an object and unbind all vmas
> This was doable because of the global bound list change.
>
> v3: With the commit to actually pin/unpin pages in place, there is no
> longer a need to check if unbind succeeded before calling put_pages().
> Make put_pages only BUG() after checking pin count.
>
> v4: Rebased on top of the new hangcheck work by Mika
> plumbed eb_destroy also
> Many checkpatch related fixes
>
> v5: Very large rebase
>
> v6:
> Change BUG_ON to WARN_ON (Daniel)
> Rename vm to ggtt in preallocate stolen, since it is always ggtt when
> dealing with stolen memory. (Daniel)
> list_for_each will short-circuit already (Daniel)
> remove superflous space (Daniel)
> Use per object list of vmas (Daniel)
> Make obj_bound_any() use obj_bound for each vm (Ben)
> s/bind_to_gtt/bind_to_vm/ (Ben)
>
> Fixed up the inactive shrinker. As Daniel noticed the code could
> potentially count the same object multiple times. While it's not
> possible in the current case, since 1 object can only ever be bound into
> 1 address space thus far - we may as well try to get something more
> future proof in place now. With a prep patch before this to switch over
> to using the bound list + inactive check, we're now able to carry that
> forward for every address space an object is bound into.
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Rebase on top of the loss of "drm/i915: Cleanup more of VMA
in destroy".]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-08-01 07:00:10 +07:00
|
|
|
int __must_check i915_vma_unbind(struct i915_vma *vma);
|
2013-01-15 19:39:35 +07:00
|
|
|
int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
|
2013-12-14 00:22:31 +07:00
|
|
|
void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
|
2010-11-09 02:18:58 +07:00
|
|
|
void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
|
2010-09-24 22:02:42 +07:00
|
|
|
|
2014-02-19 01:15:45 +07:00
|
|
|
int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
|
|
|
|
int *needs_clflush);
|
|
|
|
|
2012-06-07 21:38:42 +07:00
|
|
|
int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
|
2012-06-01 21:20:22 +07:00
|
|
|
static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
|
|
|
|
{
|
2013-02-19 00:28:02 +07:00
|
|
|
struct sg_page_iter sg_iter;
|
|
|
|
|
|
|
|
for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, n)
|
2013-03-26 20:14:18 +07:00
|
|
|
return sg_page_iter_page(&sg_iter);
|
2013-02-19 00:28:02 +07:00
|
|
|
|
|
|
|
return NULL;
|
2012-06-01 21:20:22 +07:00
|
|
|
}
|
2012-09-05 03:02:54 +07:00
|
|
|
static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
|
|
|
|
{
|
|
|
|
BUG_ON(obj->pages == NULL);
|
|
|
|
obj->pages_pin_count++;
|
|
|
|
}
|
|
|
|
static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
|
|
|
|
{
|
|
|
|
BUG_ON(obj->pages_pin_count == 0);
|
|
|
|
obj->pages_pin_count--;
|
|
|
|
}
|
|
|
|
|
2010-11-26 01:00:26 +07:00
|
|
|
int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
|
2012-04-06 04:47:36 +07:00
|
|
|
int i915_gem_object_sync(struct drm_i915_gem_object *obj,
|
2014-05-22 20:13:33 +07:00
|
|
|
struct intel_engine_cs *to);
|
2013-09-24 23:57:58 +07:00
|
|
|
void i915_vma_move_to_active(struct i915_vma *vma,
|
2014-05-22 20:13:33 +07:00
|
|
|
struct intel_engine_cs *ring);
|
2011-02-07 09:16:14 +07:00
|
|
|
int i915_gem_dumb_create(struct drm_file *file_priv,
|
|
|
|
struct drm_device *dev,
|
|
|
|
struct drm_mode_create_dumb *args);
|
2014-11-20 15:56:25 +07:00
|
|
|
int i915_gem_dumb_map_offset(struct drm_file *file_priv,
|
|
|
|
struct drm_device *dev, uint32_t handle,
|
|
|
|
uint64_t *offset);
|
2010-09-24 22:02:42 +07:00
|
|
|
/**
|
|
|
|
* Returns true if seq1 is later than seq2.
|
|
|
|
*/
|
|
|
|
static inline bool
|
|
|
|
i915_seqno_passed(uint32_t seq1, uint32_t seq2)
|
|
|
|
{
|
|
|
|
return (int32_t)(seq1 - seq2) >= 0;
|
|
|
|
}
|
|
|
|
|
2012-12-19 16:13:08 +07:00
|
|
|
int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
|
|
|
|
int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
|
2012-04-17 21:31:24 +07:00
|
|
|
int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj);
|
2010-11-10 23:40:20 +07:00
|
|
|
int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
|
2010-11-23 22:26:33 +07:00
|
|
|
|
2014-05-13 17:11:26 +07:00
|
|
|
bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
|
|
|
|
void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
|
2011-12-14 19:57:08 +07:00
|
|
|
|
2014-02-25 22:11:23 +07:00
|
|
|
struct drm_i915_gem_request *
|
2014-05-22 20:13:33 +07:00
|
|
|
i915_gem_find_active_request(struct intel_engine_cs *ring);
|
2014-02-25 22:11:23 +07:00
|
|
|
|
drm/i915: Boost RPS frequency for CPU stalls
If we encounter a situation where the CPU blocks waiting for results
from the GPU, give the GPU a kick to boost its the frequency.
This should work to reduce user interface stalls and to quickly promote
mesa to high frequencies - but the cost is that our requested frequency
stalls high (as we do not idle for long enough before rc6 to start
reducing frequencies, nor are we aggressive at down clocking an
underused GPU). However, this should be mitigated by rc6 itself powering
off the GPU when idle, and that energy use is dependent upon the workload
of the GPU in addition to its frequency (e.g. the math or sampler
functions only consume power when used). Still, this is likely to
adversely affect light workloads.
In particular, this nearly eliminates the highly noticeable wake-up lag
in animations from idle. For example, expose or workspace transitions.
(However, given the situation where we fail to downclock, our requested
frequency is almost always the maximum, except for Baytrail where we
manually downclock upon idling. This often masks the latency of
upclocking after being idle, so animations are typically smooth - at the
cost of increased power consumption.)
Stéphane raised the concern that this will punish good applications and
reward bad applications - but due to the nature of how mesa performs its
client throttling, I believe all mesa applications will be roughly
equally affected. To address this concern, and to prevent applications
like compositors from permanently boosting the RPS state, we ratelimit the
frequency of the wait-boosts each client recieves.
Unfortunately, this techinique is ineffective with Ironlake - which also
has dynamic render power states and suffers just as dramatically. For
Ironlake, the thermal/power headroom is shared with the CPU through
Intelligent Power Sharing and the intel-ips module. This leaves us with
no GPU boost frequencies available when coming out of idle, and due to
hardware limitations we cannot change the arbitration between the CPU and
GPU quickly enough to be effective.
v2: Limit each client to receiving a single boost for each active period.
Tested by QA to only marginally increase power, and to demonstrably
increase throughput in games. No latency measurements yet.
v3: Cater for front-buffer rendering with manual throttling.
v4: Tidy up.
v5: Sadly the compositor needs frequent boosts as it may never idle, but
due to its picking mechanism (using ReadPixels) may require frequent
waits. Those waits, along with the waits for the vrefresh swap, conspire
to keep the GPU at low frequencies despite the interactive latency. To
overcome this we ditch the one-boost-per-active-period and just ratelimit
the number of wait-boosts each client can receive.
Reported-and-tested-by: Paul Neumann <paul104x@yahoo.de>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68716
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Stéphane Marchesin <stephane.marchesin@gmail.com>
Cc: Owen Taylor <otaylor@redhat.com>
Cc: "Meng, Mengmeng" <mengmeng.meng@intel.com>
Cc: "Zhuang, Lena" <lena.zhuang@intel.com>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
[danvet: No extern for function prototypes in headers.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-09-25 23:34:56 +07:00
|
|
|
bool i915_gem_retire_requests(struct drm_device *dev);
|
2014-05-22 20:13:33 +07:00
|
|
|
void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
|
2012-11-14 23:14:05 +07:00
|
|
|
int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
|
drm/i915: non-interruptible sleeps can't handle -EAGAIN
So don't return -EAGAIN, even in the case of a gpu hang. Remap it to
-EIO instead. Note that this isn't really an issue with
interruptability, but more that we have quite a few codepaths (mostly
around kms stuff) that simply can't handle any errors and hence not
even -EAGAIN. Instead of adding proper failure paths so that we could
restart these ioctls we've opted for the cheap way out of sleeping
non-interruptibly. Which works everywhere but when the gpu dies,
which this patch fixes.
So essentially interruptible == false means 'wait for the gpu or die
trying'.'
This patch is a bit ugly because intel_ring_begin is all non-interruptible
and hence only returns -EIO. But as the comment in there says,
auditing all the callsites would be a pain.
To avoid duplicating code, reuse i915_gem_check_wedge in __wait_seqno
and intel_wait_ring_buffer. Also use the opportunity to clarify the
different cases in i915_gem_check_wedge a bit with comments.
v2: Don't access dev_priv->mm.interruptible from check_wedge - we
might not hold dev->struct_mutex, making this racy. Instead pass
interruptible in as a parameter. I've noticed this because I've hit a
BUG_ON(!mutex_is_locked) at the top of check_wedge. This has been
added in
commit b4aca0106c466b5a0329318203f65bac2d91b682
Author: Ben Widawsky <ben@bwidawsk.net>
Date: Wed Apr 25 20:50:12 2012 -0700
drm/i915: extract some common olr+wedge code
although that commit is missing any justification for this. I guess
it's just copy&paste, because the same commit add the same BUG_ON
check to check_olr, where it indeed makes sense.
But in check_wedge everything we access is protected by other means,
so this is superflous. And because it now gets in the way (we add a
new caller in __wait_seqno, which can be called without
dev->struct_mutext) let's just remove it.
v3: Group all the i915_gem_check_wedge refactoring into this patch, so
that this patch here is all about not returning -EAGAIN to callsites
that can't handle syscall restarting.
v4: Add clarification what interuptible == fales means in our code,
requested by Ben Widawsky.
v5: Fix EAGAIN mispell noticed by Chris Wilson.
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Tested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-07-05 03:54:13 +07:00
|
|
|
bool interruptible);
|
drm/i915: Replaced Blitter ring based flips with MMIO flips
This patch enables the framework for using MMIO based flip calls,
in contrast with the CS based flip calls which are being used currently.
MMIO based flip calls can be enabled on architectures where
Render and Blitter engines reside in different power wells. The
decision to use MMIO flips can be made based on workloads to give
100% residency for Media power well.
v2: The MMIO flips now use the interrupt driven mechanism for issuing the
flips when target seqno is reached. (Incorporating Ville's idea)
v3: Rebasing on latest code. Code restructuring after incorporating
Damien's comments
v4: Addressing Ville's review comments
-general cleanup
-updating only base addr instead of calling update_primary_plane
-extending patch for gen5+ platforms
v5: Addressed Ville's review comments
-Making mmio flip vs cs flip selection based on module parameter
-Adding check for DRIVER_MODESET feature in notify_ring before calling
notify mmio flip.
-Other changes mostly in function arguments
v6: -Having a seperate function to check condition for using mmio flips (Ville)
-propogating error code from i915_gem_check_olr (Ville)
v7: -Adding __must_check with i915_gem_check_olr (Chris)
-Renaming mmio_flip_data to mmio_flip (Chris)
-Rebasing on latest nightly
v8: -Rebasing on latest code
-squash 3rd patch in series(mmio setbase vs page flip race) with this patch
-Added new tiling mode update in intel_do_mmio_flip (Chris)
v9: -check for obj->last_write_seqno being 0 instead of obj->ring being NULL in
intel_postpone_flip, as this is a more restrictive condition (Chris)
v10: -Applied Chris's suggestions for squashing patches 2,3 into this patch.
These patches make the selection of CS vs MMIO flip at the page flip time, and
make the module parameter for using mmio flips as tristate, the states being
'force CS flips', 'force mmio flips', 'driver discretion'.
Changed the logic for driver discretion (Chris)
v11: Minor code cleanup(better readability, fixing whitespace errors, using
lockdep to check mutex locked status in postpone_flip, removal of __must_check
in function definition) (Chris)
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Akash Goel <akash.goel@intel.com>
Tested-by: Chris Wilson <chris@chris-wilson.co.uk> # snb, ivb
[danvet: Fix up parameter alignement checkpatch spotted.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-06-02 18:17:17 +07:00
|
|
|
int __must_check i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno);
|
|
|
|
|
2012-11-15 23:17:22 +07:00
|
|
|
static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
|
|
|
|
{
|
|
|
|
return unlikely(atomic_read(&error->reset_counter)
|
2013-11-12 19:44:19 +07:00
|
|
|
& (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED));
|
2012-11-15 23:17:22 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
|
|
|
|
{
|
2013-11-12 19:44:19 +07:00
|
|
|
return atomic_read(&error->reset_counter) & I915_WEDGED;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline u32 i915_reset_count(struct i915_gpu_error *error)
|
|
|
|
{
|
|
|
|
return ((atomic_read(&error->reset_counter) & ~I915_WEDGED) + 1) / 2;
|
2012-11-15 23:17:22 +07:00
|
|
|
}
|
2012-02-15 18:25:36 +07:00
|
|
|
|
2014-03-28 23:18:18 +07:00
|
|
|
static inline bool i915_stop_ring_allow_ban(struct drm_i915_private *dev_priv)
|
|
|
|
{
|
|
|
|
return dev_priv->gpu_error.stop_rings == 0 ||
|
|
|
|
dev_priv->gpu_error.stop_rings & I915_STOP_RING_ALLOW_BAN;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool i915_stop_ring_allow_warn(struct drm_i915_private *dev_priv)
|
|
|
|
{
|
|
|
|
return dev_priv->gpu_error.stop_rings == 0 ||
|
|
|
|
dev_priv->gpu_error.stop_rings & I915_STOP_RING_ALLOW_WARN;
|
|
|
|
}
|
|
|
|
|
2010-09-30 22:53:18 +07:00
|
|
|
void i915_gem_reset(struct drm_device *dev);
|
2013-08-08 20:41:09 +07:00
|
|
|
bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
|
2011-04-14 04:04:09 +07:00
|
|
|
int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
|
2012-04-24 21:47:41 +07:00
|
|
|
int __must_check i915_gem_init(struct drm_device *dev);
|
2014-07-24 23:04:21 +07:00
|
|
|
int i915_gem_init_rings(struct drm_device *dev);
|
2012-02-02 15:58:12 +07:00
|
|
|
int __must_check i915_gem_init_hw(struct drm_device *dev);
|
2014-05-22 20:13:33 +07:00
|
|
|
int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice);
|
2012-02-02 15:58:12 +07:00
|
|
|
void i915_gem_init_swizzling(struct drm_device *dev);
|
DRM: i915: add mode setting support
This commit adds i915 driver support for the DRM mode setting APIs.
Currently, VGA, LVDS, SDVO DVI & VGA, TV and DVO LVDS outputs are
supported. HDMI, DisplayPort and additional SDVO output support will
follow.
Support for the mode setting code is controlled by the new 'modeset'
module option. A new config option, CONFIG_DRM_I915_KMS controls the
default behavior, and whether a PCI ID list is built into the module for
use by user level module utilities.
Note that if mode setting is enabled, user level drivers that access
display registers directly or that don't use the kernel graphics memory
manager will likely corrupt kernel graphics memory, disrupt output
configuration (possibly leading to hangs and/or blank displays), and
prevent panic/oops messages from appearing. So use caution when
enabling this code; be sure your user level code supports the new
interfaces.
A new SysRq key, 'g', provides emergency support for switching back to
the kernel's framebuffer console; which is useful for testing.
Co-authors: Dave Airlie <airlied@linux.ie>, Hong Liu <hong.liu@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2008-11-08 05:24:08 +07:00
|
|
|
void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
|
2012-04-27 06:02:58 +07:00
|
|
|
int __must_check i915_gpu_idle(struct drm_device *dev);
|
2013-10-16 17:50:01 +07:00
|
|
|
int __must_check i915_gem_suspend(struct drm_device *dev);
|
2014-05-22 20:13:33 +07:00
|
|
|
int __i915_add_request(struct intel_engine_cs *ring,
|
2013-06-12 16:35:30 +07:00
|
|
|
struct drm_file *file,
|
2013-06-12 19:01:39 +07:00
|
|
|
struct drm_i915_gem_object *batch_obj,
|
2013-06-12 16:35:30 +07:00
|
|
|
u32 *seqno);
|
|
|
|
#define i915_add_request(ring, seqno) \
|
2013-06-18 14:29:58 +07:00
|
|
|
__i915_add_request(ring, NULL, NULL, seqno)
|
2014-11-06 14:26:38 +07:00
|
|
|
int __i915_wait_seqno(struct intel_engine_cs *ring, u32 seqno,
|
|
|
|
unsigned reset_counter,
|
|
|
|
bool interruptible,
|
|
|
|
s64 *timeout,
|
|
|
|
struct drm_i915_file_private *file_priv);
|
2014-05-22 20:13:33 +07:00
|
|
|
int __must_check i915_wait_seqno(struct intel_engine_cs *ring,
|
2012-05-25 05:03:11 +07:00
|
|
|
uint32_t seqno);
|
2008-11-13 01:03:55 +07:00
|
|
|
int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
|
2010-11-23 22:26:33 +07:00
|
|
|
int __must_check
|
|
|
|
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
|
|
|
|
bool write);
|
|
|
|
int __must_check
|
2012-03-26 15:10:27 +07:00
|
|
|
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
|
|
|
|
int __must_check
|
2011-04-14 15:41:17 +07:00
|
|
|
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
|
|
|
|
u32 alignment,
|
2014-05-22 20:13:33 +07:00
|
|
|
struct intel_engine_cs *pipelined);
|
2013-08-09 18:25:09 +07:00
|
|
|
void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj);
|
2014-05-21 18:42:56 +07:00
|
|
|
int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
|
2010-08-07 17:01:39 +07:00
|
|
|
int align);
|
drm/i915: Boost RPS frequency for CPU stalls
If we encounter a situation where the CPU blocks waiting for results
from the GPU, give the GPU a kick to boost its the frequency.
This should work to reduce user interface stalls and to quickly promote
mesa to high frequencies - but the cost is that our requested frequency
stalls high (as we do not idle for long enough before rc6 to start
reducing frequencies, nor are we aggressive at down clocking an
underused GPU). However, this should be mitigated by rc6 itself powering
off the GPU when idle, and that energy use is dependent upon the workload
of the GPU in addition to its frequency (e.g. the math or sampler
functions only consume power when used). Still, this is likely to
adversely affect light workloads.
In particular, this nearly eliminates the highly noticeable wake-up lag
in animations from idle. For example, expose or workspace transitions.
(However, given the situation where we fail to downclock, our requested
frequency is almost always the maximum, except for Baytrail where we
manually downclock upon idling. This often masks the latency of
upclocking after being idle, so animations are typically smooth - at the
cost of increased power consumption.)
Stéphane raised the concern that this will punish good applications and
reward bad applications - but due to the nature of how mesa performs its
client throttling, I believe all mesa applications will be roughly
equally affected. To address this concern, and to prevent applications
like compositors from permanently boosting the RPS state, we ratelimit the
frequency of the wait-boosts each client recieves.
Unfortunately, this techinique is ineffective with Ironlake - which also
has dynamic render power states and suffers just as dramatically. For
Ironlake, the thermal/power headroom is shared with the CPU through
Intelligent Power Sharing and the intel-ips module. This leaves us with
no GPU boost frequencies available when coming out of idle, and due to
hardware limitations we cannot change the arbitration between the CPU and
GPU quickly enough to be effective.
v2: Limit each client to receiving a single boost for each active period.
Tested by QA to only marginally increase power, and to demonstrably
increase throughput in games. No latency measurements yet.
v3: Cater for front-buffer rendering with manual throttling.
v4: Tidy up.
v5: Sadly the compositor needs frequent boosts as it may never idle, but
due to its picking mechanism (using ReadPixels) may require frequent
waits. Those waits, along with the waits for the vrefresh swap, conspire
to keep the GPU at low frequencies despite the interactive latency. To
overcome this we ditch the one-boost-per-active-period and just ratelimit
the number of wait-boosts each client can receive.
Reported-and-tested-by: Paul Neumann <paul104x@yahoo.de>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68716
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Stéphane Marchesin <stephane.marchesin@gmail.com>
Cc: Owen Taylor <otaylor@redhat.com>
Cc: "Meng, Mengmeng" <mengmeng.meng@intel.com>
Cc: "Zhuang, Lena" <lena.zhuang@intel.com>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
[danvet: No extern for function prototypes in headers.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-09-25 23:34:56 +07:00
|
|
|
int i915_gem_open(struct drm_device *dev, struct drm_file *file);
|
2010-11-09 02:18:58 +07:00
|
|
|
void i915_gem_release(struct drm_device *dev, struct drm_file *file);
|
2008-07-31 02:06:12 +07:00
|
|
|
|
2013-01-08 02:47:35 +07:00
|
|
|
uint32_t
|
|
|
|
i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode);
|
2011-03-07 17:42:03 +07:00
|
|
|
uint32_t
|
2013-01-08 02:47:33 +07:00
|
|
|
i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
|
|
|
|
int tiling_mode, bool fenced);
|
2011-03-07 17:42:03 +07:00
|
|
|
|
2011-04-04 15:44:39 +07:00
|
|
|
int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
|
|
|
|
enum i915_cache_level cache_level);
|
|
|
|
|
2012-05-10 20:25:09 +07:00
|
|
|
struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
|
|
|
|
struct dma_buf *dma_buf);
|
|
|
|
|
|
|
|
struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
|
|
|
|
struct drm_gem_object *gem_obj, int flags);
|
|
|
|
|
2013-06-12 16:15:12 +07:00
|
|
|
void i915_gem_restore_fences(struct drm_device *dev);
|
|
|
|
|
2013-08-01 06:59:56 +07:00
|
|
|
unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o,
|
|
|
|
struct i915_address_space *vm);
|
|
|
|
bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o);
|
|
|
|
bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
|
|
|
|
struct i915_address_space *vm);
|
|
|
|
unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
|
|
|
|
struct i915_address_space *vm);
|
|
|
|
struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
|
|
|
|
struct i915_address_space *vm);
|
2013-08-14 16:38:35 +07:00
|
|
|
struct i915_vma *
|
|
|
|
i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
|
|
|
|
struct i915_address_space *vm);
|
2013-09-24 23:57:57 +07:00
|
|
|
|
|
|
|
struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj);
|
2013-12-07 05:10:55 +07:00
|
|
|
static inline bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) {
|
|
|
|
struct i915_vma *vma;
|
|
|
|
list_for_each_entry(vma, &obj->vma_list, vma_link)
|
|
|
|
if (vma->pin_count > 0)
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
2013-09-24 23:57:57 +07:00
|
|
|
|
2013-08-01 06:59:56 +07:00
|
|
|
/* Some GGTT VM helpers */
|
2014-08-06 20:04:49 +07:00
|
|
|
#define i915_obj_to_ggtt(obj) \
|
2013-08-01 06:59:56 +07:00
|
|
|
(&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base)
|
|
|
|
static inline bool i915_is_ggtt(struct i915_address_space *vm)
|
|
|
|
{
|
|
|
|
struct i915_address_space *ggtt =
|
|
|
|
&((struct drm_i915_private *)(vm)->dev->dev_private)->gtt.base;
|
|
|
|
return vm == ggtt;
|
|
|
|
}
|
|
|
|
|
2014-08-06 20:04:48 +07:00
|
|
|
static inline struct i915_hw_ppgtt *
|
|
|
|
i915_vm_to_ppgtt(struct i915_address_space *vm)
|
|
|
|
{
|
|
|
|
WARN_ON(i915_is_ggtt(vm));
|
|
|
|
|
|
|
|
return container_of(vm, struct i915_hw_ppgtt, base);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-08-01 06:59:56 +07:00
|
|
|
static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj)
|
|
|
|
{
|
2014-08-06 20:04:49 +07:00
|
|
|
return i915_gem_obj_bound(obj, i915_obj_to_ggtt(obj));
|
2013-08-01 06:59:56 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned long
|
|
|
|
i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *obj)
|
|
|
|
{
|
2014-08-06 20:04:49 +07:00
|
|
|
return i915_gem_obj_offset(obj, i915_obj_to_ggtt(obj));
|
2013-08-01 06:59:56 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned long
|
|
|
|
i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj)
|
|
|
|
{
|
2014-08-06 20:04:49 +07:00
|
|
|
return i915_gem_obj_size(obj, i915_obj_to_ggtt(obj));
|
2013-08-01 06:59:56 +07:00
|
|
|
}
|
2013-08-01 06:59:58 +07:00
|
|
|
|
|
|
|
static inline int __must_check
|
|
|
|
i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
|
|
|
|
uint32_t alignment,
|
2014-02-14 20:01:11 +07:00
|
|
|
unsigned flags)
|
2013-08-01 06:59:58 +07:00
|
|
|
{
|
2014-08-06 20:04:49 +07:00
|
|
|
return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj),
|
|
|
|
alignment, flags | PIN_GLOBAL);
|
2013-08-01 06:59:58 +07:00
|
|
|
}
|
2013-08-01 06:59:56 +07:00
|
|
|
|
2014-02-14 20:01:19 +07:00
|
|
|
static inline int
|
|
|
|
i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj)
|
|
|
|
{
|
|
|
|
return i915_vma_unbind(i915_gem_obj_to_ggtt(obj));
|
|
|
|
}
|
|
|
|
|
|
|
|
void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
|
|
|
|
|
drm/i915: preliminary context support
Very basic code for context setup/destruction in the driver.
Adds the file i915_gem_context.c This file implements HW context
support. On gen5+ a HW context consists of an opaque GPU object which is
referenced at times of context saves and restores. With RC6 enabled,
the context is also referenced as the GPU enters and exists from RC6
(GPU has it's own internal power context, except on gen5). Though
something like a context does exist for the media ring, the code only
supports contexts for the render ring.
In software, there is a distinction between contexts created by the
user, and the default HW context. The default HW context is used by GPU
clients that do not request setup of their own hardware context. The
default context's state is never restored to help prevent programming
errors. This would happen if a client ran and piggy-backed off another
clients GPU state. The default context only exists to give the GPU some
offset to load as the current to invoke a save of the context we
actually care about. In fact, the code could likely be constructed,
albeit in a more complicated fashion, to never use the default context,
though that limits the driver's ability to swap out, and/or destroy
other contexts.
All other contexts are created as a request by the GPU client. These
contexts store GPU state, and thus allow GPU clients to not re-emit
state (and potentially query certain state) at any time. The kernel
driver makes certain that the appropriate commands are inserted.
There are 4 entry points into the contexts, init, fini, open, close.
The names are self-explanatory except that init can be called during
reset, and also during pm thaw/resume. As we expect our context to be
preserved across these events, we do not reinitialize in this case.
As Adam Jackson pointed out, The cutoff of 1MB where a HW context is
considered too big is arbitrary. The reason for this is even though
context sizes are increasing with every generation, they have yet to
eclipse even 32k. If we somehow read back way more than that, it
probably means BIOS has done something strange, or we're running on a
platform that wasn't designed for this.
v2: rename load/unload to init/fini (daniel)
remove ILK support for get_size() (indirectly daniel)
add HAS_HW_CONTEXTS macro to clarify supported platforms (daniel)
added comments (Ben)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
2012-06-05 04:42:42 +07:00
|
|
|
/* i915_gem_context.c */
|
2013-11-06 22:56:29 +07:00
|
|
|
int __must_check i915_gem_context_init(struct drm_device *dev);
|
drm/i915: preliminary context support
Very basic code for context setup/destruction in the driver.
Adds the file i915_gem_context.c This file implements HW context
support. On gen5+ a HW context consists of an opaque GPU object which is
referenced at times of context saves and restores. With RC6 enabled,
the context is also referenced as the GPU enters and exists from RC6
(GPU has it's own internal power context, except on gen5). Though
something like a context does exist for the media ring, the code only
supports contexts for the render ring.
In software, there is a distinction between contexts created by the
user, and the default HW context. The default HW context is used by GPU
clients that do not request setup of their own hardware context. The
default context's state is never restored to help prevent programming
errors. This would happen if a client ran and piggy-backed off another
clients GPU state. The default context only exists to give the GPU some
offset to load as the current to invoke a save of the context we
actually care about. In fact, the code could likely be constructed,
albeit in a more complicated fashion, to never use the default context,
though that limits the driver's ability to swap out, and/or destroy
other contexts.
All other contexts are created as a request by the GPU client. These
contexts store GPU state, and thus allow GPU clients to not re-emit
state (and potentially query certain state) at any time. The kernel
driver makes certain that the appropriate commands are inserted.
There are 4 entry points into the contexts, init, fini, open, close.
The names are self-explanatory except that init can be called during
reset, and also during pm thaw/resume. As we expect our context to be
preserved across these events, we do not reinitialize in this case.
As Adam Jackson pointed out, The cutoff of 1MB where a HW context is
considered too big is arbitrary. The reason for this is even though
context sizes are increasing with every generation, they have yet to
eclipse even 32k. If we somehow read back way more than that, it
probably means BIOS has done something strange, or we're running on a
platform that wasn't designed for this.
v2: rename load/unload to init/fini (daniel)
remove ILK support for get_size() (indirectly daniel)
add HAS_HW_CONTEXTS macro to clarify supported platforms (daniel)
added comments (Ben)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
2012-06-05 04:42:42 +07:00
|
|
|
void i915_gem_context_fini(struct drm_device *dev);
|
2013-12-07 05:11:03 +07:00
|
|
|
void i915_gem_context_reset(struct drm_device *dev);
|
2013-12-07 05:10:58 +07:00
|
|
|
int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
|
drm/i915: Split context enabling from init
We **need** to do this for exactly 1 reason, because we want to embed a
PPGTT into the context, but we don't want to special case the default
context.
To achieve that, we must be able to initialize contexts after the GTT is
setup (so we can allocate and pin the default context's BO), but before
the PPGTT and rings are initialized. This is because, currently, context
initialization requires ring usage. We don't have rings until after the
GTT is setup. If we split the enabling part of context initialization,
the part requiring the ringbuffer, we can untangle this, and then later
embed the PPGTT
Incidentally this allows us to also adhere to the original design of
context init/fini in future patches: they were only ever meant to be
called at driver load and unload.
v2: Move hw_contexts_disabled test in i915_gem_context_enable() (Chris)
v3: BUG_ON after checking for disabled contexts. Or else it blows up pre
gen6 (Ben)
v4: Forward port
Modified enable for each ring, since that patch is earlier in the series
Dropped ring arg from create_default_context so it can be used by others
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 05:11:04 +07:00
|
|
|
int i915_gem_context_enable(struct drm_i915_private *dev_priv);
|
drm/i915: preliminary context support
Very basic code for context setup/destruction in the driver.
Adds the file i915_gem_context.c This file implements HW context
support. On gen5+ a HW context consists of an opaque GPU object which is
referenced at times of context saves and restores. With RC6 enabled,
the context is also referenced as the GPU enters and exists from RC6
(GPU has it's own internal power context, except on gen5). Though
something like a context does exist for the media ring, the code only
supports contexts for the render ring.
In software, there is a distinction between contexts created by the
user, and the default HW context. The default HW context is used by GPU
clients that do not request setup of their own hardware context. The
default context's state is never restored to help prevent programming
errors. This would happen if a client ran and piggy-backed off another
clients GPU state. The default context only exists to give the GPU some
offset to load as the current to invoke a save of the context we
actually care about. In fact, the code could likely be constructed,
albeit in a more complicated fashion, to never use the default context,
though that limits the driver's ability to swap out, and/or destroy
other contexts.
All other contexts are created as a request by the GPU client. These
contexts store GPU state, and thus allow GPU clients to not re-emit
state (and potentially query certain state) at any time. The kernel
driver makes certain that the appropriate commands are inserted.
There are 4 entry points into the contexts, init, fini, open, close.
The names are self-explanatory except that init can be called during
reset, and also during pm thaw/resume. As we expect our context to be
preserved across these events, we do not reinitialize in this case.
As Adam Jackson pointed out, The cutoff of 1MB where a HW context is
considered too big is arbitrary. The reason for this is even though
context sizes are increasing with every generation, they have yet to
eclipse even 32k. If we somehow read back way more than that, it
probably means BIOS has done something strange, or we're running on a
platform that wasn't designed for this.
v2: rename load/unload to init/fini (daniel)
remove ILK support for get_size() (indirectly daniel)
add HAS_HW_CONTEXTS macro to clarify supported platforms (daniel)
added comments (Ben)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
2012-06-05 04:42:42 +07:00
|
|
|
void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
|
2014-05-22 20:13:33 +07:00
|
|
|
int i915_switch_context(struct intel_engine_cs *ring,
|
2014-05-22 20:13:37 +07:00
|
|
|
struct intel_context *to);
|
|
|
|
struct intel_context *
|
drm/i915: Get context early in execbuf
We need to have the address space when reserving space for the objects.
Since the address space and context are tied together, and reserve
occurs before context switch (for good reason), we must lookup our
context earlier in the process.
This leaves some room for optimizations where we no longer need to use
ctx_id in certain places. This will be addressed in a subsequent patch.
Important tricky bit:
Because slow relocations during execbuffer drop struct_mutex
Perhaps it would be best to acquire the reference when we get the
context, but I'll save that for another day (note I have written the
patch before, and I found the changes required to be uglier than this).
Note that since we currently access everything via context id, and not
the data structure this is fine, though not desirable. The next change
attempts to get the context only once via the context ID idr lookup, and
as such, the following can happen:
CTX-A is created, refcount = 1
CTX-A execbuf, mutex dropped
close IOCTL called on CTX-A, refcount = 0
CTX-A resumes in execbuf.
v2: Rebased on top of
commit b6359918b885da7c7b58c050674278dbd06020ab
Author: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Wed Oct 30 15:44:16 2013 +0200
drm/i915: add i915_get_reset_stats_ioctl
v3: Rebased on top of
commit 25b3dfc87bff80317d67ddd2cd4cfb91e6fe7d79
Author: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue Nov 12 11:57:30 2013 +0200
Author: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Tue Nov 26 16:14:33 2013 +0200
drm/i915: check context reset stats before relocations
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 05:11:21 +07:00
|
|
|
i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id);
|
2013-04-30 17:30:33 +07:00
|
|
|
void i915_gem_context_free(struct kref *ctx_ref);
|
drm/i915/bdw: A bit more advanced LR context alloc/free
Now that we have the ability to allocate our own context backing objects
and we have multiplexed one of them per engine inside the context structs,
we can finally allocate and free them correctly.
Regarding the context size, reading the register to calculate the sizes
can work, I think, however the docs are very clear about the actual
context sizes on GEN8, so just hardcode that and use it.
v2: Rebased on top of the Full PPGTT series. It is important to notice
that at this point we have one global default context per engine, all
of them using the aliasing PPGTT (as opposed to the single global
default context we have with legacy HW contexts).
v3:
- Go back to one single global default context, this time with multiple
backing objects inside.
- Use different context sizes for non-render engines, as suggested by
Damien (still hardcoded, since the information about the context size
registers in the BSpec is, well, *lacking*).
- Render ctx size is 20 (or 19) pages, but not 21 (caught by Damien).
- Move default context backing object creation to intel_init_ring (so
that we don't waste memory in rings that might not get initialized).
v4:
- Reuse the HW legacy context init/fini.
- Create a separate free function.
- Rename the functions with an intel_ preffix.
v5: Several rebases to account for the changes in the previous patches.
Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1)
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-07-24 23:04:14 +07:00
|
|
|
struct drm_i915_gem_object *
|
|
|
|
i915_gem_alloc_context_obj(struct drm_device *dev, size_t size);
|
2014-05-22 20:13:37 +07:00
|
|
|
static inline void i915_gem_context_reference(struct intel_context *ctx)
|
2013-04-30 17:30:33 +07:00
|
|
|
{
|
2014-04-09 15:07:36 +07:00
|
|
|
kref_get(&ctx->ref);
|
2013-04-30 17:30:33 +07:00
|
|
|
}
|
|
|
|
|
2014-05-22 20:13:37 +07:00
|
|
|
static inline void i915_gem_context_unreference(struct intel_context *ctx)
|
2013-04-30 17:30:33 +07:00
|
|
|
{
|
2014-04-09 15:07:36 +07:00
|
|
|
kref_put(&ctx->ref, i915_gem_context_free);
|
2013-04-30 17:30:33 +07:00
|
|
|
}
|
|
|
|
|
2014-05-22 20:13:37 +07:00
|
|
|
static inline bool i915_gem_context_is_default(const struct intel_context *c)
|
2014-01-30 21:05:48 +07:00
|
|
|
{
|
drm/i915: Emphasize that ctx->id is merely a user handle
This is an Execlists preparatory patch, since they make context ID become an
overloaded term:
- In the software, it was used to distinguish which context userspace was
trying to use.
- In the BSpec, the term is used to describe the 20-bits long field the
hardware uses to it to discriminate the contexts that are submitted to
the ELSP and inform the driver about their current status (via Context
Switch Interrupts and Context Status Buffers).
Initially, I tried to make the different meanings converge, but it proved
impossible:
- The software ctx->id is per-filp, while the hardware one needs to be
globally unique.
- Also, we multiplex several backing states objects per intel_context,
and all of them need unique HW IDs.
- I tried adding a per-filp ID and then composing the HW context ID as:
ctx->id + file_priv->id + ring->id, but the fact that the hardware only
uses 20-bits means we have to artificially limit the number of filps or
contexts the userspace can create.
The ctx->user_handle renaming bits are done with this Cocci patch (plus
manual frobbing of the struct declaration):
@@
struct intel_context c;
@@
- (c).id
+ c.user_handle
@@
struct intel_context *c;
@@
- (c)->id
+ c->user_handle
Also, while we are at it, s/DEFAULT_CONTEXT_ID/DEFAULT_CONTEXT_HANDLE and
change the type to unsigned 32 bits.
v2: s/handle/user_handle and change the type to uint32_t as suggested by
Chris Wilson.
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> (v1)
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-07-03 22:28:00 +07:00
|
|
|
return c->user_handle == DEFAULT_CONTEXT_HANDLE;
|
2014-01-30 21:05:48 +07:00
|
|
|
}
|
|
|
|
|
2012-06-05 04:42:54 +07:00
|
|
|
int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file);
|
|
|
|
int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file);
|
2012-05-10 20:25:09 +07:00
|
|
|
|
2013-12-07 05:11:23 +07:00
|
|
|
/* i915_gem_evict.c */
|
|
|
|
int __must_check i915_gem_evict_something(struct drm_device *dev,
|
|
|
|
struct i915_address_space *vm,
|
|
|
|
int min_size,
|
|
|
|
unsigned alignment,
|
|
|
|
unsigned cache_level,
|
drm/i915: Prevent negative relocation deltas from wrapping
This is pure evil. Userspace, I'm looking at you SNA, repacks batch
buffers on the fly after generation as they are being passed to the
kernel for execution. These batches also contain self-referenced
relocations as a single buffer encompasses the state commands, kernels,
vertices and sampler. During generation the buffers are placed at known
offsets within the full batch, and then the relocation deltas (as passed
to the kernel) are tweaked as the batch is repacked into a smaller buffer.
This means that userspace is passing negative relocations deltas, which
subsequently wrap to large values if the batch is at a low address. The
GPU hangs when it then tries to use the large value as a base for its
address offsets, rather than wrapping back to the real value (as one
would hope). As the GPU uses positive offsets from the base, we can
treat the relocation address as the minimum address read by the GPU.
For the upper bound, we trust that userspace will not read beyond the
end of the buffer.
So, how do we fix negative relocations from wrapping? We can either
check that every relocation looks valid when we write it, and then
position each object such that we prevent the offset wraparound, or we
just special-case the self-referential behaviour of SNA and force all
batches to be above 256k. Daniel prefers the latter approach.
This fixes a GPU hang when it tries to use an address (relocation +
offset) greater than the GTT size. The issue would occur quite easily
with full-ppgtt as each fd gets its own VM space, so low offsets would
often be handed out. However, with the rearrangement of the low GTT due
to capturing the BIOS framebuffer, it is already affecting kernels 3.15
onwards. I think only IVB+ is susceptible to this bug, but the workaround
should only kick in rarely, so it seems sensible to always apply it.
v3: Use a bias for batch buffers to prevent small negative delta relocations
from wrapping.
v4 from Daniel:
- s/BIAS/BATCH_OFFSET_BIAS/
- Extract eb_vma_misplaced/i915_vma_misplaced since the conditions
were growing rather cumbersome.
- Add a comment to eb_get_batch explaining why we do this.
- Apply the batch offset bias everywhere but mention that we've only
observed it on gen7 gpus.
- Drop PIN_OFFSET_FIX for now, that slipped in from a feature patch.
v5: Add static to eb_get_batch, spotted by 0-day tester.
Testcase: igt/gem_bad_reloc
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=78533
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v3)
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-05-23 13:48:08 +07:00
|
|
|
unsigned long start,
|
|
|
|
unsigned long end,
|
2014-02-14 20:01:11 +07:00
|
|
|
unsigned flags);
|
2013-12-07 05:11:23 +07:00
|
|
|
int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
|
|
|
|
int i915_gem_evict_everything(struct drm_device *dev);
|
2012-02-09 23:15:46 +07:00
|
|
|
|
drm/i915: Split out GTT specific header file
This file contains all necessary defines, prototypes and typesdefs for
manipulating GEN graphics address translation (this does not include the
legacy AGP driver)
Reiterating the comment in the header,
"Please try to maintain the following order within this file unless it
makes sense to do otherwise. From top to bottom:
1. typedefs
2. #defines, and macros
3. structure definitions
4. function prototypes
Within each section, please try to order by generation in ascending
order, from top to bottom (ie. GEN6 on the top, GEN8 on the bottom)."
I've made some minor cleanups, and fixed a couple of typos while here -
but there should be no functional changes.
The purpose of the patch is to reduce clutter in our main header file,
making room for new growth, and make documentation of our interfaces
easier by splitting things out.
With a little more work, like making i915_gtt a pointer, we could
potentially completely isolate this header from i915_drv.h. At the
moment however, I don't think it's worth the effort.
Personally, I would have liked to put the PTE encoding functions in this
file too, but I didn't want to rock the boat too much.
A similar patch has been in use on my machine for some time. This exact
patch though has only been compile tested.
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-03-23 12:47:21 +07:00
|
|
|
/* belongs in i915_gem_gtt.h */
|
2012-11-16 03:06:09 +07:00
|
|
|
static inline void i915_gem_chipset_flush(struct drm_device *dev)
|
2012-11-05 00:21:27 +07:00
|
|
|
{
|
|
|
|
if (INTEL_INFO(dev)->gen < 6)
|
|
|
|
intel_gtt_chipset_flush();
|
|
|
|
}
|
2013-12-07 05:11:14 +07:00
|
|
|
|
2012-04-24 21:47:39 +07:00
|
|
|
/* i915_gem_stolen.c */
|
|
|
|
int i915_gem_init_stolen(struct drm_device *dev);
|
2014-07-01 00:41:24 +07:00
|
|
|
int i915_gem_stolen_setup_compression(struct drm_device *dev, int size, int fb_cpp);
|
2012-11-15 18:32:20 +07:00
|
|
|
void i915_gem_stolen_cleanup_compression(struct drm_device *dev);
|
2012-04-24 21:47:39 +07:00
|
|
|
void i915_gem_cleanup_stolen(struct drm_device *dev);
|
2012-11-15 18:32:26 +07:00
|
|
|
struct drm_i915_gem_object *
|
|
|
|
i915_gem_object_create_stolen(struct drm_device *dev, u32 size);
|
2013-02-20 04:31:37 +07:00
|
|
|
struct drm_i915_gem_object *
|
|
|
|
i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
|
|
|
|
u32 stolen_offset,
|
|
|
|
u32 gtt_offset,
|
|
|
|
u32 size);
|
2012-04-24 21:47:39 +07:00
|
|
|
|
2008-07-31 02:06:12 +07:00
|
|
|
/* i915_gem_tiling.c */
|
2013-08-02 00:39:55 +07:00
|
|
|
static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
|
2012-12-04 04:03:14 +07:00
|
|
|
{
|
2014-03-31 18:27:21 +07:00
|
|
|
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
|
2012-12-04 04:03:14 +07:00
|
|
|
|
|
|
|
return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
|
|
|
|
obj->tiling_mode != I915_TILING_NONE;
|
|
|
|
}
|
|
|
|
|
2008-07-31 02:06:12 +07:00
|
|
|
void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
|
2010-11-09 02:18:58 +07:00
|
|
|
void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj);
|
|
|
|
void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
|
2008-07-31 02:06:12 +07:00
|
|
|
|
|
|
|
/* i915_gem_debug.c */
|
2010-09-29 22:10:57 +07:00
|
|
|
#if WATCH_LISTS
|
|
|
|
int i915_verify_lists(struct drm_device *dev);
|
2008-07-31 02:06:12 +07:00
|
|
|
#else
|
2010-09-29 22:10:57 +07:00
|
|
|
#define i915_verify_lists(dev) 0
|
2008-07-31 02:06:12 +07:00
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-02-18 08:08:50 +07:00
|
|
|
/* i915_debugfs.c */
|
2009-07-02 09:26:52 +07:00
|
|
|
int i915_debugfs_init(struct drm_minor *minor);
|
|
|
|
void i915_debugfs_cleanup(struct drm_minor *minor);
|
2013-10-16 16:49:58 +07:00
|
|
|
#ifdef CONFIG_DEBUG_FS
|
2013-10-16 00:55:40 +07:00
|
|
|
void intel_display_crc_init(struct drm_device *dev);
|
|
|
|
#else
|
2013-10-16 16:49:58 +07:00
|
|
|
static inline void intel_display_crc_init(struct drm_device *dev) {}
|
2013-10-16 00:55:40 +07:00
|
|
|
#endif
|
2013-07-12 20:50:57 +07:00
|
|
|
|
|
|
|
/* i915_gpu_error.c */
|
2013-05-23 17:55:35 +07:00
|
|
|
__printf(2, 3)
|
|
|
|
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
|
2013-06-06 19:18:39 +07:00
|
|
|
int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
|
|
|
|
const struct i915_error_state_file_priv *error);
|
2013-06-06 19:18:41 +07:00
|
|
|
int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb,
|
2014-08-22 20:41:39 +07:00
|
|
|
struct drm_i915_private *i915,
|
2013-06-06 19:18:41 +07:00
|
|
|
size_t count, loff_t pos);
|
|
|
|
static inline void i915_error_state_buf_release(
|
|
|
|
struct drm_i915_error_state_buf *eb)
|
|
|
|
{
|
|
|
|
kfree(eb->buf);
|
|
|
|
}
|
2014-02-25 22:11:26 +07:00
|
|
|
void i915_capture_error_state(struct drm_device *dev, bool wedge,
|
|
|
|
const char *error_msg);
|
2013-07-12 20:50:57 +07:00
|
|
|
void i915_error_state_get(struct drm_device *dev,
|
|
|
|
struct i915_error_state_file_priv *error_priv);
|
|
|
|
void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
|
|
|
|
void i915_destroy_error_state(struct drm_device *dev);
|
|
|
|
|
|
|
|
void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone);
|
2014-08-22 20:41:39 +07:00
|
|
|
const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
|
2009-02-18 08:08:50 +07:00
|
|
|
|
2014-02-19 01:15:46 +07:00
|
|
|
/* i915_cmd_parser.c */
|
2014-02-19 01:15:56 +07:00
|
|
|
int i915_cmd_parser_get_version(void);
|
2014-05-22 20:13:33 +07:00
|
|
|
int i915_cmd_parser_init_ring(struct intel_engine_cs *ring);
|
|
|
|
void i915_cmd_parser_fini_ring(struct intel_engine_cs *ring);
|
|
|
|
bool i915_needs_cmd_parser(struct intel_engine_cs *ring);
|
|
|
|
int i915_parse_cmds(struct intel_engine_cs *ring,
|
2014-02-19 01:15:46 +07:00
|
|
|
struct drm_i915_gem_object *batch_obj,
|
|
|
|
u32 batch_start_offset,
|
|
|
|
bool is_master);
|
|
|
|
|
2008-08-26 05:11:06 +07:00
|
|
|
/* i915_suspend.c */
|
|
|
|
extern int i915_save_state(struct drm_device *dev);
|
|
|
|
extern int i915_restore_state(struct drm_device *dev);
|
2008-10-01 02:14:26 +07:00
|
|
|
|
2013-01-25 23:53:20 +07:00
|
|
|
/* i915_ums.c */
|
|
|
|
void i915_save_display_reg(struct drm_device *dev);
|
|
|
|
void i915_restore_display_reg(struct drm_device *dev);
|
2008-08-26 05:11:06 +07:00
|
|
|
|
2012-04-11 11:17:01 +07:00
|
|
|
/* i915_sysfs.c */
|
|
|
|
void i915_setup_sysfs(struct drm_device *dev_priv);
|
|
|
|
void i915_teardown_sysfs(struct drm_device *dev_priv);
|
|
|
|
|
2010-07-21 05:44:45 +07:00
|
|
|
/* intel_i2c.c */
|
|
|
|
extern int intel_setup_gmbus(struct drm_device *dev);
|
|
|
|
extern void intel_teardown_gmbus(struct drm_device *dev);
|
2013-05-06 19:52:08 +07:00
|
|
|
static inline bool intel_gmbus_is_port_valid(unsigned port)
|
2012-03-28 01:36:14 +07:00
|
|
|
{
|
2012-03-28 01:36:15 +07:00
|
|
|
return (port >= GMBUS_PORT_SSC && port <= GMBUS_PORT_DPD);
|
2012-03-28 01:36:14 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
extern struct i2c_adapter *intel_gmbus_get_adapter(
|
|
|
|
struct drm_i915_private *dev_priv, unsigned port);
|
2010-09-24 18:52:03 +07:00
|
|
|
extern void intel_gmbus_set_speed(struct i2c_adapter *adapter, int speed);
|
|
|
|
extern void intel_gmbus_force_bit(struct i2c_adapter *adapter, bool force_bit);
|
2013-05-06 19:52:08 +07:00
|
|
|
static inline bool intel_gmbus_is_forced_bit(struct i2c_adapter *adapter)
|
2010-09-28 22:41:32 +07:00
|
|
|
{
|
|
|
|
return container_of(adapter, struct intel_gmbus, adapter)->force_bit;
|
|
|
|
}
|
2010-07-21 05:44:45 +07:00
|
|
|
extern void intel_i2c_reset(struct drm_device *dev);
|
|
|
|
|
2010-08-24 15:02:58 +07:00
|
|
|
/* intel_opregion.c */
|
2010-08-19 22:09:23 +07:00
|
|
|
#ifdef CONFIG_ACPI
|
2013-12-06 15:52:05 +07:00
|
|
|
extern int intel_opregion_setup(struct drm_device *dev);
|
2010-08-19 22:09:23 +07:00
|
|
|
extern void intel_opregion_init(struct drm_device *dev);
|
|
|
|
extern void intel_opregion_fini(struct drm_device *dev);
|
2010-08-24 15:02:58 +07:00
|
|
|
extern void intel_opregion_asle_intr(struct drm_device *dev);
|
2013-08-30 23:40:30 +07:00
|
|
|
extern int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder,
|
|
|
|
bool enable);
|
2013-08-30 23:40:31 +07:00
|
|
|
extern int intel_opregion_notify_adapter(struct drm_device *dev,
|
|
|
|
pci_power_t state);
|
2008-10-25 04:18:10 +07:00
|
|
|
#else
|
2013-12-06 15:52:05 +07:00
|
|
|
static inline int intel_opregion_setup(struct drm_device *dev) { return 0; }
|
2010-08-19 22:09:23 +07:00
|
|
|
static inline void intel_opregion_init(struct drm_device *dev) { return; }
|
|
|
|
static inline void intel_opregion_fini(struct drm_device *dev) { return; }
|
2010-08-24 15:02:58 +07:00
|
|
|
static inline void intel_opregion_asle_intr(struct drm_device *dev) { return; }
|
2013-08-30 23:40:30 +07:00
|
|
|
static inline int
|
|
|
|
intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, bool enable)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
2013-08-30 23:40:31 +07:00
|
|
|
static inline int
|
|
|
|
intel_opregion_notify_adapter(struct drm_device *dev, pci_power_t state)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
2008-10-25 04:18:10 +07:00
|
|
|
#endif
|
2008-08-06 01:37:25 +07:00
|
|
|
|
2010-10-08 06:01:13 +07:00
|
|
|
/* intel_acpi.c */
|
|
|
|
#ifdef CONFIG_ACPI
|
|
|
|
extern void intel_register_dsm_handler(void);
|
|
|
|
extern void intel_unregister_dsm_handler(void);
|
|
|
|
#else
|
|
|
|
static inline void intel_register_dsm_handler(void) { return; }
|
|
|
|
static inline void intel_unregister_dsm_handler(void) { return; }
|
|
|
|
#endif /* CONFIG_ACPI */
|
|
|
|
|
DRM: i915: add mode setting support
This commit adds i915 driver support for the DRM mode setting APIs.
Currently, VGA, LVDS, SDVO DVI & VGA, TV and DVO LVDS outputs are
supported. HDMI, DisplayPort and additional SDVO output support will
follow.
Support for the mode setting code is controlled by the new 'modeset'
module option. A new config option, CONFIG_DRM_I915_KMS controls the
default behavior, and whether a PCI ID list is built into the module for
use by user level module utilities.
Note that if mode setting is enabled, user level drivers that access
display registers directly or that don't use the kernel graphics memory
manager will likely corrupt kernel graphics memory, disrupt output
configuration (possibly leading to hangs and/or blank displays), and
prevent panic/oops messages from appearing. So use caution when
enabling this code; be sure your user level code supports the new
interfaces.
A new SysRq key, 'g', provides emergency support for switching back to
the kernel's framebuffer console; which is useful for testing.
Co-authors: Dave Airlie <airlied@linux.ie>, Hong Liu <hong.liu@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2008-11-08 05:24:08 +07:00
|
|
|
/* modesetting */
|
2012-04-10 20:50:11 +07:00
|
|
|
extern void intel_modeset_init_hw(struct drm_device *dev);
|
DRM: i915: add mode setting support
This commit adds i915 driver support for the DRM mode setting APIs.
Currently, VGA, LVDS, SDVO DVI & VGA, TV and DVO LVDS outputs are
supported. HDMI, DisplayPort and additional SDVO output support will
follow.
Support for the mode setting code is controlled by the new 'modeset'
module option. A new config option, CONFIG_DRM_I915_KMS controls the
default behavior, and whether a PCI ID list is built into the module for
use by user level module utilities.
Note that if mode setting is enabled, user level drivers that access
display registers directly or that don't use the kernel graphics memory
manager will likely corrupt kernel graphics memory, disrupt output
configuration (possibly leading to hangs and/or blank displays), and
prevent panic/oops messages from appearing. So use caution when
enabling this code; be sure your user level code supports the new
interfaces.
A new SysRq key, 'g', provides emergency support for switching back to
the kernel's framebuffer console; which is useful for testing.
Co-authors: Dave Airlie <airlied@linux.ie>, Hong Liu <hong.liu@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2008-11-08 05:24:08 +07:00
|
|
|
extern void intel_modeset_init(struct drm_device *dev);
|
2011-03-29 16:40:27 +07:00
|
|
|
extern void intel_modeset_gem_init(struct drm_device *dev);
|
DRM: i915: add mode setting support
This commit adds i915 driver support for the DRM mode setting APIs.
Currently, VGA, LVDS, SDVO DVI & VGA, TV and DVO LVDS outputs are
supported. HDMI, DisplayPort and additional SDVO output support will
follow.
Support for the mode setting code is controlled by the new 'modeset'
module option. A new config option, CONFIG_DRM_I915_KMS controls the
default behavior, and whether a PCI ID list is built into the module for
use by user level module utilities.
Note that if mode setting is enabled, user level drivers that access
display registers directly or that don't use the kernel graphics memory
manager will likely corrupt kernel graphics memory, disrupt output
configuration (possibly leading to hangs and/or blank displays), and
prevent panic/oops messages from appearing. So use caution when
enabling this code; be sure your user level code supports the new
interfaces.
A new SysRq key, 'g', provides emergency support for switching back to
the kernel's framebuffer console; which is useful for testing.
Co-authors: Dave Airlie <airlied@linux.ie>, Hong Liu <hong.liu@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2008-11-08 05:24:08 +07:00
|
|
|
extern void intel_modeset_cleanup(struct drm_device *dev);
|
2014-02-11 22:12:48 +07:00
|
|
|
extern void intel_connector_unregister(struct intel_connector *);
|
2009-09-21 11:33:58 +07:00
|
|
|
extern int intel_modeset_vga_set_state(struct drm_device *dev, bool state);
|
2012-11-24 00:16:34 +07:00
|
|
|
extern void intel_modeset_setup_hw_state(struct drm_device *dev,
|
|
|
|
bool force_restore);
|
2013-01-25 23:53:21 +07:00
|
|
|
extern void i915_redisable_vga(struct drm_device *dev);
|
2014-02-18 05:02:16 +07:00
|
|
|
extern void i915_redisable_vga_power_on(struct drm_device *dev);
|
2010-04-23 22:17:39 +07:00
|
|
|
extern bool intel_fbc_enabled(struct drm_device *dev);
|
2014-09-25 06:50:59 +07:00
|
|
|
extern void bdw_fbc_sw_flush(struct drm_device *dev, u32 value);
|
2011-07-08 18:22:36 +07:00
|
|
|
extern void intel_disable_fbc(struct drm_device *dev);
|
2010-05-21 04:28:11 +07:00
|
|
|
extern bool ironlake_set_drps(struct drm_device *dev, u8 val);
|
2012-12-01 21:04:25 +07:00
|
|
|
extern void intel_init_pch_refclk(struct drm_device *dev);
|
2010-12-18 05:19:02 +07:00
|
|
|
extern void gen6_set_rps(struct drm_device *dev, u8 val);
|
2013-04-18 05:54:58 +07:00
|
|
|
extern void valleyview_set_rps(struct drm_device *dev, u8 val);
|
2014-07-01 16:36:17 +07:00
|
|
|
extern void intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
|
|
|
|
bool enable);
|
2011-08-17 02:34:10 +07:00
|
|
|
extern void intel_detect_pch(struct drm_device *dev);
|
|
|
|
extern int intel_trans_dp_port_sel(struct drm_crtc *crtc);
|
2012-04-11 11:17:01 +07:00
|
|
|
extern int intel_enable_rc6(const struct drm_device *dev);
|
2010-04-07 15:15:53 +07:00
|
|
|
|
2012-04-06 04:47:36 +07:00
|
|
|
extern bool i915_semaphore_is_enabled(struct drm_device *dev);
|
2012-07-13 01:01:05 +07:00
|
|
|
int i915_reg_read_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file);
|
2013-10-30 20:44:16 +07:00
|
|
|
int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file);
|
2012-03-29 03:39:37 +07:00
|
|
|
|
drm/i915: Replaced Blitter ring based flips with MMIO flips
This patch enables the framework for using MMIO based flip calls,
in contrast with the CS based flip calls which are being used currently.
MMIO based flip calls can be enabled on architectures where
Render and Blitter engines reside in different power wells. The
decision to use MMIO flips can be made based on workloads to give
100% residency for Media power well.
v2: The MMIO flips now use the interrupt driven mechanism for issuing the
flips when target seqno is reached. (Incorporating Ville's idea)
v3: Rebasing on latest code. Code restructuring after incorporating
Damien's comments
v4: Addressing Ville's review comments
-general cleanup
-updating only base addr instead of calling update_primary_plane
-extending patch for gen5+ platforms
v5: Addressed Ville's review comments
-Making mmio flip vs cs flip selection based on module parameter
-Adding check for DRIVER_MODESET feature in notify_ring before calling
notify mmio flip.
-Other changes mostly in function arguments
v6: -Having a seperate function to check condition for using mmio flips (Ville)
-propogating error code from i915_gem_check_olr (Ville)
v7: -Adding __must_check with i915_gem_check_olr (Chris)
-Renaming mmio_flip_data to mmio_flip (Chris)
-Rebasing on latest nightly
v8: -Rebasing on latest code
-squash 3rd patch in series(mmio setbase vs page flip race) with this patch
-Added new tiling mode update in intel_do_mmio_flip (Chris)
v9: -check for obj->last_write_seqno being 0 instead of obj->ring being NULL in
intel_postpone_flip, as this is a more restrictive condition (Chris)
v10: -Applied Chris's suggestions for squashing patches 2,3 into this patch.
These patches make the selection of CS vs MMIO flip at the page flip time, and
make the module parameter for using mmio flips as tristate, the states being
'force CS flips', 'force mmio flips', 'driver discretion'.
Changed the logic for driver discretion (Chris)
v11: Minor code cleanup(better readability, fixing whitespace errors, using
lockdep to check mutex locked status in postpone_flip, removal of __must_check
in function definition) (Chris)
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Akash Goel <akash.goel@intel.com>
Tested-by: Chris Wilson <chris@chris-wilson.co.uk> # snb, ivb
[danvet: Fix up parameter alignement checkpatch spotted.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-06-02 18:17:17 +07:00
|
|
|
void intel_notify_mmio_flip(struct intel_engine_cs *ring);
|
|
|
|
|
2010-08-05 02:26:07 +07:00
|
|
|
/* overlay */
|
|
|
|
extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
|
2013-05-23 17:55:35 +07:00
|
|
|
extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
|
|
|
|
struct intel_overlay_error_state *error);
|
2010-11-21 20:12:35 +07:00
|
|
|
|
|
|
|
extern struct intel_display_error_state *intel_display_capture_error_state(struct drm_device *dev);
|
2013-05-23 17:55:35 +07:00
|
|
|
extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
|
2010-11-21 20:12:35 +07:00
|
|
|
struct drm_device *dev,
|
|
|
|
struct intel_display_error_state *error);
|
2010-08-05 02:26:07 +07:00
|
|
|
|
2011-04-26 01:22:22 +07:00
|
|
|
/* On SNB platform, before reading ring registers forcewake bit
|
|
|
|
* must be set to prevent GT core from power down and stale values being
|
|
|
|
* returned.
|
|
|
|
*/
|
2013-11-23 16:25:42 +07:00
|
|
|
void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv, int fw_engine);
|
|
|
|
void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine);
|
2014-02-21 23:52:26 +07:00
|
|
|
void assert_force_wake_inactive(struct drm_i915_private *dev_priv);
|
2011-04-26 01:22:22 +07:00
|
|
|
|
2014-11-14 09:50:10 +07:00
|
|
|
int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val);
|
|
|
|
int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val);
|
2013-05-22 19:36:16 +07:00
|
|
|
|
|
|
|
/* intel_sideband.c */
|
2013-05-22 19:36:20 +07:00
|
|
|
u32 vlv_punit_read(struct drm_i915_private *dev_priv, u8 addr);
|
|
|
|
void vlv_punit_write(struct drm_i915_private *dev_priv, u8 addr, u32 val);
|
|
|
|
u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr);
|
2013-08-27 19:12:14 +07:00
|
|
|
u32 vlv_gpio_nc_read(struct drm_i915_private *dev_priv, u32 reg);
|
|
|
|
void vlv_gpio_nc_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
|
|
|
|
u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg);
|
|
|
|
void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
|
|
|
|
u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg);
|
|
|
|
void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
|
2013-11-05 02:52:44 +07:00
|
|
|
u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg);
|
|
|
|
void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
|
2013-08-27 19:12:14 +07:00
|
|
|
u32 vlv_gps_core_read(struct drm_i915_private *dev_priv, u32 reg);
|
|
|
|
void vlv_gps_core_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
|
2013-09-05 19:41:49 +07:00
|
|
|
u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg);
|
|
|
|
void vlv_dpio_write(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 val);
|
2013-05-22 19:36:16 +07:00
|
|
|
u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
|
|
|
|
enum intel_sbi_destination destination);
|
|
|
|
void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
|
|
|
|
enum intel_sbi_destination destination);
|
2013-12-10 13:44:55 +07:00
|
|
|
u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg);
|
|
|
|
void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
|
2013-04-18 05:54:58 +07:00
|
|
|
|
2013-11-06 03:42:29 +07:00
|
|
|
int vlv_gpu_freq(struct drm_i915_private *dev_priv, int val);
|
|
|
|
int vlv_freq_opcode(struct drm_i915_private *dev_priv, int val);
|
2012-09-27 00:34:00 +07:00
|
|
|
|
2013-11-23 16:25:42 +07:00
|
|
|
#define FORCEWAKE_RENDER (1 << 0)
|
|
|
|
#define FORCEWAKE_MEDIA (1 << 1)
|
2014-11-05 00:07:04 +07:00
|
|
|
#define FORCEWAKE_BLITTER (1 << 2)
|
|
|
|
#define FORCEWAKE_ALL (FORCEWAKE_RENDER | FORCEWAKE_MEDIA | \
|
|
|
|
FORCEWAKE_BLITTER)
|
2013-11-23 16:25:42 +07:00
|
|
|
|
|
|
|
|
2013-10-05 11:22:51 +07:00
|
|
|
#define I915_READ8(reg) dev_priv->uncore.funcs.mmio_readb(dev_priv, (reg), true)
|
|
|
|
#define I915_WRITE8(reg, val) dev_priv->uncore.funcs.mmio_writeb(dev_priv, (reg), (val), true)
|
|
|
|
|
|
|
|
#define I915_READ16(reg) dev_priv->uncore.funcs.mmio_readw(dev_priv, (reg), true)
|
|
|
|
#define I915_WRITE16(reg, val) dev_priv->uncore.funcs.mmio_writew(dev_priv, (reg), (val), true)
|
|
|
|
#define I915_READ16_NOTRACE(reg) dev_priv->uncore.funcs.mmio_readw(dev_priv, (reg), false)
|
|
|
|
#define I915_WRITE16_NOTRACE(reg, val) dev_priv->uncore.funcs.mmio_writew(dev_priv, (reg), (val), false)
|
|
|
|
|
|
|
|
#define I915_READ(reg) dev_priv->uncore.funcs.mmio_readl(dev_priv, (reg), true)
|
|
|
|
#define I915_WRITE(reg, val) dev_priv->uncore.funcs.mmio_writel(dev_priv, (reg), (val), true)
|
|
|
|
#define I915_READ_NOTRACE(reg) dev_priv->uncore.funcs.mmio_readl(dev_priv, (reg), false)
|
|
|
|
#define I915_WRITE_NOTRACE(reg, val) dev_priv->uncore.funcs.mmio_writel(dev_priv, (reg), (val), false)
|
|
|
|
|
2014-03-21 20:16:43 +07:00
|
|
|
/* Be very careful with read/write 64-bit values. On 32-bit machines, they
|
|
|
|
* will be implemented using 2 32-bit writes in an arbitrary order with
|
|
|
|
* an arbitrary delay between them. This can cause the hardware to
|
|
|
|
* act upon the intermediate value, possibly leading to corruption and
|
|
|
|
* machine death. You have been warned.
|
|
|
|
*/
|
2013-10-05 11:22:51 +07:00
|
|
|
#define I915_WRITE64(reg, val) dev_priv->uncore.funcs.mmio_writeq(dev_priv, (reg), (val), true)
|
|
|
|
#define I915_READ64(reg) dev_priv->uncore.funcs.mmio_readq(dev_priv, (reg), true)
|
2010-11-09 16:17:32 +07:00
|
|
|
|
2014-03-21 19:41:53 +07:00
|
|
|
#define I915_READ64_2x32(lower_reg, upper_reg) ({ \
|
|
|
|
u32 upper = I915_READ(upper_reg); \
|
|
|
|
u32 lower = I915_READ(lower_reg); \
|
|
|
|
u32 tmp = I915_READ(upper_reg); \
|
|
|
|
if (upper != tmp) { \
|
|
|
|
upper = tmp; \
|
|
|
|
lower = I915_READ(lower_reg); \
|
|
|
|
WARN_ON(I915_READ(upper_reg) != upper); \
|
|
|
|
} \
|
|
|
|
(u64)upper << 32 | lower; })
|
|
|
|
|
2010-11-09 16:17:32 +07:00
|
|
|
#define POSTING_READ(reg) (void)I915_READ_NOTRACE(reg)
|
|
|
|
#define POSTING_READ16(reg) (void)I915_READ16_NOTRACE(reg)
|
|
|
|
|
2013-01-17 21:31:29 +07:00
|
|
|
/* "Broadcast RGB" property */
|
|
|
|
#define INTEL_BROADCAST_RGB_AUTO 0
|
|
|
|
#define INTEL_BROADCAST_RGB_FULL 1
|
|
|
|
#define INTEL_BROADCAST_RGB_LIMITED 2
|
2010-11-08 16:09:41 +07:00
|
|
|
|
2013-01-26 02:44:46 +07:00
|
|
|
static inline uint32_t i915_vgacntrl_reg(struct drm_device *dev)
|
|
|
|
{
|
2014-07-21 16:53:40 +07:00
|
|
|
if (IS_VALLEYVIEW(dev))
|
2013-01-26 02:44:46 +07:00
|
|
|
return VLV_VGACNTRL;
|
2014-07-21 16:53:40 +07:00
|
|
|
else if (INTEL_INFO(dev)->gen >= 5)
|
|
|
|
return CPU_VGACNTRL;
|
2013-01-26 02:44:46 +07:00
|
|
|
else
|
|
|
|
return VGACNTRL;
|
|
|
|
}
|
|
|
|
|
2013-02-22 21:12:51 +07:00
|
|
|
static inline void __user *to_user_ptr(u64 address)
|
|
|
|
{
|
|
|
|
return (void __user *)(uintptr_t)address;
|
|
|
|
}
|
|
|
|
|
2013-05-22 00:03:17 +07:00
|
|
|
static inline unsigned long msecs_to_jiffies_timeout(const unsigned int m)
|
|
|
|
{
|
|
|
|
unsigned long j = msecs_to_jiffies(m);
|
|
|
|
|
|
|
|
return min_t(unsigned long, MAX_JIFFY_OFFSET, j + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned long
|
|
|
|
timespec_to_jiffies_timeout(const struct timespec *value)
|
|
|
|
{
|
|
|
|
unsigned long j = timespec_to_jiffies(value);
|
|
|
|
|
|
|
|
return min_t(unsigned long, MAX_JIFFY_OFFSET, j + 1);
|
|
|
|
}
|
|
|
|
|
2013-12-19 23:29:40 +07:00
|
|
|
/*
|
|
|
|
* If you need to wait X milliseconds between events A and B, but event B
|
|
|
|
* doesn't happen exactly after event A, you record the timestamp (jiffies) of
|
|
|
|
* when event A happened, then just before event B you call this function and
|
|
|
|
* pass the timestamp as the first argument, and X as the second argument.
|
|
|
|
*/
|
|
|
|
static inline void
|
|
|
|
wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
|
|
|
|
{
|
2014-01-29 18:25:40 +07:00
|
|
|
unsigned long target_jiffies, tmp_jiffies, remaining_jiffies;
|
2013-12-19 23:29:40 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Don't re-read the value of "jiffies" every time since it may change
|
|
|
|
* behind our back and break the math.
|
|
|
|
*/
|
|
|
|
tmp_jiffies = jiffies;
|
|
|
|
target_jiffies = timestamp_jiffies +
|
|
|
|
msecs_to_jiffies_timeout(to_wait_ms);
|
|
|
|
|
|
|
|
if (time_after(target_jiffies, tmp_jiffies)) {
|
2014-01-29 18:25:40 +07:00
|
|
|
remaining_jiffies = target_jiffies - tmp_jiffies;
|
|
|
|
while (remaining_jiffies)
|
|
|
|
remaining_jiffies =
|
|
|
|
schedule_timeout_uninterruptible(remaining_jiffies);
|
2013-12-19 23:29:40 +07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#endif
|