2005-04-17 05:20:36 +07:00
|
|
|
/* i915_drv.h -- Private header for the I915 driver -*- linux-c -*-
|
|
|
|
*/
|
2006-01-02 16:14:23 +07:00
|
|
|
/*
|
2005-06-23 19:46:46 +07:00
|
|
|
*
|
2005-04-17 05:20:36 +07:00
|
|
|
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
|
|
|
|
* All Rights Reserved.
|
2005-06-23 19:46:46 +07:00
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the
|
|
|
|
* "Software"), to deal in the Software without restriction, including
|
|
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
|
|
* distribute, sub license, and/or sell copies of the Software, and to
|
|
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
|
|
* the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the
|
|
|
|
* next paragraph) shall be included in all copies or substantial portions
|
|
|
|
* of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
|
|
|
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
|
|
|
|
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
*
|
2006-01-02 16:14:23 +07:00
|
|
|
*/
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
#ifndef _I915_DRV_H_
|
|
|
|
#define _I915_DRV_H_
|
|
|
|
|
2012-12-04 04:03:14 +07:00
|
|
|
#include <uapi/drm/i915_drm.h>
|
2015-02-11 00:16:05 +07:00
|
|
|
#include <uapi/drm/drm_fourcc.h>
|
2012-12-04 04:03:14 +07:00
|
|
|
|
2008-10-31 09:38:48 +07:00
|
|
|
#include <linux/io-mapping.h>
|
2010-07-21 05:44:45 +07:00
|
|
|
#include <linux/i2c.h>
|
2012-02-28 06:43:09 +07:00
|
|
|
#include <linux/i2c-algo-bit.h>
|
2011-08-12 17:11:33 +07:00
|
|
|
#include <linux/backlight.h>
|
2017-06-16 21:05:16 +07:00
|
|
|
#include <linux/hash.h>
|
2012-04-06 04:47:36 +07:00
|
|
|
#include <linux/intel-iommu.h>
|
2012-04-27 20:17:39 +07:00
|
|
|
#include <linux/kref.h>
|
2018-06-07 04:45:20 +07:00
|
|
|
#include <linux/mm_types.h>
|
drm/i915/pmu: Expose a PMU interface for perf queries
From: Chris Wilson <chris@chris-wilson.co.uk>
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
From: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
The first goal is to be able to measure GPU (and invidual ring) busyness
without having to poll registers from userspace. (Which not only incurs
holding the forcewake lock indefinitely, perturbing the system, but also
runs the risk of hanging the machine.) As an alternative we can use the
perf event counter interface to sample the ring registers periodically
and send those results to userspace.
Functionality we are exporting to userspace is via the existing perf PMU
API and can be exercised via the existing tools. For example:
perf stat -a -e i915/rcs0-busy/ -I 1000
Will print the render engine busynnes once per second. All the performance
counters can be enumerated (perf list) and have their unit of measure
correctly reported in sysfs.
v1-v2 (Chris Wilson):
v2: Use a common timer for the ring sampling.
v3: (Tvrtko Ursulin)
* Decouple uAPI from i915 engine ids.
* Complete uAPI defines.
* Refactor some code to helpers for clarity.
* Skip sampling disabled engines.
* Expose counters in sysfs.
* Pass in fake regs to avoid null ptr deref in perf core.
* Convert to class/instance uAPI.
* Use shared driver code for rc6 residency, power and frequency.
v4: (Dmitry Rogozhkin)
* Register PMU with .task_ctx_nr=perf_invalid_context
* Expose cpumask for the PMU with the single CPU in the mask
* Properly support pmu->stop(): it should call pmu->read()
* Properly support pmu->del(): it should call stop(event, PERF_EF_UPDATE)
* Introduce refcounting of event subscriptions.
* Make pmu.busy_stats a refcounter to avoid busy stats going away
with some deleted event.
* Expose cpumask for i915 PMU to avoid multiple events creation of
the same type followed by counter aggregation by perf-stat.
* Track CPUs getting online/offline to migrate perf context. If (likely)
cpumask will initially set CPU0, CONFIG_BOOTPARAM_HOTPLUG_CPU0 will be
needed to see effect of CPU status tracking.
* End result is that only global events are supported and perf stat
works correctly.
* Deny perf driver level sampling - it is prohibited for uncore PMU.
v5: (Tvrtko Ursulin)
* Don't hardcode number of engine samplers.
* Rewrite event ref-counting for correctness and simplicity.
* Store initial counter value when starting already enabled events
to correctly report values to all listeners.
* Fix RC6 residency readout.
* Comments, GPL header.
v6:
* Add missing entry to v4 changelog.
* Fix accounting in CPU hotplug case by copying the approach from
arch/x86/events/intel/cstate.c. (Dmitry Rogozhkin)
v7:
* Log failure message only on failure.
* Remove CPU hotplug notification state on unregister.
v8:
* Fix error unwind on failed registration.
* Checkpatch cleanup.
v9:
* Drop the energy metric, it is available via intel_rapl_perf.
(Ville Syrjälä)
* Use HAS_RC6(p). (Chris Wilson)
* Handle unsupported non-engine events. (Dmitry Rogozhkin)
* Rebase for intel_rc6_residency_ns needing caller managed
runtime pm.
* Drop HAS_RC6 checks from the read callback since creating those
events will be rejected at init time already.
* Add counter units to sysfs so perf stat output is nicer.
* Cleanup the attribute tables for brevity and readability.
v10:
* Fixed queued accounting.
v11:
* Move intel_engine_lookup_user to intel_engine_cs.c
* Commit update. (Joonas Lahtinen)
v12:
* More accurate sampling. (Chris Wilson)
* Store and report frequency in MHz for better usability from
perf stat.
* Removed metrics: queued, interrupts, rc6 counters.
* Sample engine busyness based on seqno difference only
for less MMIO (and forcewake) on all platforms. (Chris Wilson)
v13:
* Comment spelling, use mul_u32_u32 to work around potential GCC
issue and somne code alignment changes. (Chris Wilson)
v14:
* Rebase.
v15:
* Rebase for RPS refactoring.
v16:
* Use the dynamic slot in the CPU hotplug state machine so that we are
free to setup our state as multi-instance. Previously we were re-using
the CPUHP_AP_PERF_X86_UNCORE_ONLINE slot which is neither used as
multi-instance, nor owned by our driver to start with.
* Register the CPU hotplug handlers after the PMU, otherwise the callback
will get called before the PMU is initialized which can end up in
perf_pmu_migrate_context with an un-initialized base.
* Added workaround for a probable bug in cpuhp core.
v17:
* Remove workaround for the cpuhp bug.
v18:
* Rebase for drm_i915_gem_engine_class getting upstream before us.
v19:
* Rebase. (trivial)
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171121181852.16128-2-tvrtko.ursulin@linux.intel.com
2017-11-22 01:18:45 +07:00
|
|
|
#include <linux/perf_event.h>
|
drm/i915: irq-drive the dp aux communication
At least on the platforms that have a dp aux irq and also have it
enabled - vlvhsw should have one, too. But I don't have a machine to
test this on. Judging from docs there's no dp aux interrupt for gm45.
Also, I only have an ivb cpu edp machine, so the dp aux A code for
snb/ilk is untested.
For dpcd probing when nothing is connected it slashes about 5ms of cpu
time (cpu time is now negligible), which agrees with 3 * 5 400 usec
timeouts.
A previous version of this patch increases the time required to go
through the dp_detect cycle (which includes reading the edid) from
around 33 ms to around 40 ms. Experiments indicated that this is
purely due to the irq latency - the hw doesn't allow us to queue up
dp aux transactions and hence irq latency directly affects throughput.
gmbus is much better, there we have a 8 byte buffer, and we get the
irq once another 4 bytes can be queued up.
But by using the pm_qos interface to request the lowest possible cpu
wake-up latency this slowdown completely disappeared.
Since all our output detection logic is single-threaded with the
mode_config mutex right now anyway, I've decide not ot play fancy and
to just reuse the gmbus wait queue. But this would definitely prep the
way to run dp detection on different ports in parallel
v2: Add a timeout for dp aux transfers when using interrupts - the hw
_does_ prevent this with the hw-based 400 usec timeout, but if the
irq somehow doesn't arrive we're screwed. Lesson learned while
developing this ;-)
v3: While at it also convert the busy-loop to wait_for_atomic, so that
we don't run the risk of an infinite loop any more.
v4: Ensure we have the smallest possible irq latency by using the
pm_qos interface.
v5: Add a comment to the code to explain why we frob pm_qos. Suggested
by Chris Wilson.
v6: Disable dp irq for vlv, that's easier than trying to get at docs
and hw.
v7: Squash in a fix for Haswell that Paulo Zanoni tracked down - the
dp aux registers aren't at a fixed offset any more, but can be on the
PCH while the DP port is on the cpu die.
Reviewed-by: Imre Deak <imre.deak@intel.com> (v6)
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-12-01 19:53:48 +07:00
|
|
|
#include <linux/pm_qos.h>
|
drm/i915: Move GEM activity tracking into a common struct reservation_object
In preparation to support many distinct timelines, we need to expand the
activity tracking on the GEM object to handle more than just a request
per engine. We already use the struct reservation_object on the dma-buf
to handle many fence contexts, so integrating that into the GEM object
itself is the preferred solution. (For example, we can now share the same
reservation_object between every consumer/producer using this buffer and
skip the manual import/export via dma-buf.)
v2: Reimplement busy-ioctl (by walking the reservation object), postpone
the ABI change for another day. Similarly use the reservation object to
find the last_write request (if active and from i915) for choosing
display CS flips.
Caveats:
* busy-ioctl: busy-ioctl only reports on the native fences, it will not
warn of stalls (in set-domain-ioctl, pread/pwrite etc) if the object is
being rendered to by external fences. It also will not report the same
busy state as wait-ioctl (or polling on the dma-buf) in the same
circumstances. On the plus side, it does retain reporting of which
*i915* engines are engaged with this object.
* non-blocking atomic modesets take a step backwards as the wait for
render completion blocks the ioctl. This is fixed in a subsequent
patch to use a fence instead for awaiting on the rendering, see
"drm/i915: Restore nonblocking awaits for modesetting"
* dynamic array manipulation for shared-fences in reservation is slower
than the previous lockless static assignment (e.g. gem_exec_lut_handle
runtime on ivb goes from 42s to 66s), mainly due to atomic operations
(maintaining the fence refcounts).
* loss of object-level retirement callbacks, emulated by VMA retirement
tracking.
* minor loss of object-level last activity information from debugfs,
could be replaced with per-vma information if desired
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-21-chris@chris-wilson.co.uk
2016-10-28 19:58:44 +07:00
|
|
|
#include <linux/reservation.h>
|
2016-04-13 23:35:01 +07:00
|
|
|
#include <linux/shmem_fs.h>
|
|
|
|
|
|
|
|
#include <drm/drmP.h>
|
|
|
|
#include <drm/intel-gtt.h>
|
|
|
|
#include <drm/drm_legacy.h> /* for struct drm_dma_handle */
|
|
|
|
#include <drm/drm_gem.h>
|
2016-06-21 15:54:22 +07:00
|
|
|
#include <drm/drm_auth.h>
|
2017-01-10 04:56:49 +07:00
|
|
|
#include <drm/drm_cache.h>
|
2016-04-13 23:35:01 +07:00
|
|
|
|
|
|
|
#include "i915_params.h"
|
|
|
|
#include "i915_reg.h"
|
2017-01-05 22:30:22 +07:00
|
|
|
#include "i915_utils.h"
|
2016-04-13 23:35:01 +07:00
|
|
|
|
|
|
|
#include "intel_bios.h"
|
2017-12-22 04:57:32 +07:00
|
|
|
#include "intel_device_info.h"
|
2017-12-22 04:57:31 +07:00
|
|
|
#include "intel_display.h"
|
2016-03-08 22:46:19 +07:00
|
|
|
#include "intel_dpll_mgr.h"
|
2016-04-13 23:35:01 +07:00
|
|
|
#include "intel_lrc.h"
|
2017-12-22 04:57:31 +07:00
|
|
|
#include "intel_opregion.h"
|
2016-04-13 23:35:01 +07:00
|
|
|
#include "intel_ringbuffer.h"
|
2017-12-22 04:57:31 +07:00
|
|
|
#include "intel_uncore.h"
|
2018-03-14 07:32:50 +07:00
|
|
|
#include "intel_wopcm.h"
|
2017-12-22 04:57:31 +07:00
|
|
|
#include "intel_uc.h"
|
2016-04-13 23:35:01 +07:00
|
|
|
|
2016-04-13 23:35:02 +07:00
|
|
|
#include "i915_gem.h"
|
2016-12-31 18:20:11 +07:00
|
|
|
#include "i915_gem_context.h"
|
2016-11-11 17:43:54 +07:00
|
|
|
#include "i915_gem_fence_reg.h"
|
|
|
|
#include "i915_gem_object.h"
|
2016-04-13 23:35:01 +07:00
|
|
|
#include "i915_gem_gtt.h"
|
2018-03-08 16:50:37 +07:00
|
|
|
#include "i915_gpu_error.h"
|
2018-02-21 16:56:36 +07:00
|
|
|
#include "i915_request.h"
|
2018-04-19 01:40:52 +07:00
|
|
|
#include "i915_scheduler.h"
|
2018-05-02 23:38:39 +07:00
|
|
|
#include "i915_timeline.h"
|
2016-11-11 17:43:54 +07:00
|
|
|
#include "i915_vma.h"
|
|
|
|
|
drm/i915: gvt: Introduce the basic architecture of GVT-g
This patch introduces the very basic framework of GVT-g device model,
includes basic prototypes, definitions, initialization.
v12:
- Call intel_gvt_init() in driver early initialization stage. (Chris)
v8:
- Remove the GVT idr and mutex in intel_gvt_host. (Joonas)
v7:
- Refine the URL link in Kconfig. (Joonas)
- Refine the introduction of GVT-g host support in Kconfig. (Joonas)
- Remove the macro GVT_ALIGN(), use round_down() instead. (Joonas)
- Make "struct intel_gvt" a data member in struct drm_i915_private.(Joonas)
- Remove {alloc, free}_gvt_device()
- Rename intel_gvt_{create, destroy}_gvt_device()
- Expost intel_gvt_init_host()
- Remove the dummy "struct intel_gvt" declaration in intel_gvt.h (Joonas)
v6:
- Refine introduction in Kconfig. (Chris)
- The exposed API functions will take struct intel_gvt * instead of
void *. (Chris/Tvrtko)
- Remove most memebers of strct intel_gvt_device_info. Will add them
in the device model patches.(Chris)
- Remove gvt_info() and gvt_err() in debug.h. (Chris)
- Move GVT kernel parameter into i915_params. (Chris)
- Remove include/drm/i915_gvt.h, as GVT-g will be built within i915.
- Remove the redundant struct i915_gvt *, as the functions in i915
will directly take struct intel_gvt *.
- Add more comments for reviewer.
v5:
Take Tvrtko's comments:
- Fix the misspelled words in Kconfig
- Let functions take drm_i915_private * instead of struct drm_device *
- Remove redundant prints/local varible initialization
v3:
Take Joonas' comments:
- Change file name i915_gvt.* to intel_gvt.*
- Move GVT kernel parameter into intel_gvt.c
- Remove redundant debug macros
- Change error handling style
- Add introductions for some stub functions
- Introduce drm/i915_gvt.h.
Take Kevin's comments:
- Move GVT-g host/guest check into intel_vgt_balloon in i915_gem_gtt.c
v2:
- Introduce i915_gvt.c.
It's necessary to introduce the stubs between i915 driver and GVT-g host,
as GVT-g components is configurable in kernel config. When disabled, the
stubs here do nothing.
Take Joonas' comments:
- Replace boolean return value with int.
- Replace customized info/warn/debug macros with DRM macros.
- Document all non-static functions like i915.
- Remove empty and unused functions.
- Replace magic number with marcos.
- Set GVT-g in kernel config to "n" by default.
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1466078825-6662-5-git-send-email-zhi.a.wang@intel.com
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
2016-06-16 19:07:00 +07:00
|
|
|
#include "intel_gvt.h"
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* General customization:
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define DRIVER_NAME "i915"
|
|
|
|
#define DRIVER_DESC "Intel Graphics"
|
2018-09-06 19:14:18 +07:00
|
|
|
#define DRIVER_DATE "20180906"
|
2018-09-06 20:54:43 +07:00
|
|
|
#define DRIVER_TIMESTAMP 1536242083
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2014-12-16 01:56:32 +07:00
|
|
|
/* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
|
|
|
|
* WARN_ON()) for hw state sanity checks to check for unexpected conditions
|
|
|
|
* which may not necessarily be a user visible problem. This will either
|
|
|
|
* WARN() or DRM_ERROR() depending on the verbose_checks moduleparam, to
|
|
|
|
* enable distros and users to tailor their preferred amount of i915 abrt
|
|
|
|
* spam.
|
|
|
|
*/
|
|
|
|
#define I915_STATE_WARN(condition, format...) ({ \
|
|
|
|
int __ret_warn_on = !!(condition); \
|
2015-12-18 19:27:26 +07:00
|
|
|
if (unlikely(__ret_warn_on)) \
|
2017-09-20 02:38:44 +07:00
|
|
|
if (!WARN(i915_modparams.verbose_state_checks, format)) \
|
2014-12-16 01:56:32 +07:00
|
|
|
DRM_ERROR(format); \
|
|
|
|
unlikely(__ret_warn_on); \
|
|
|
|
})
|
|
|
|
|
2015-12-18 19:27:27 +07:00
|
|
|
#define I915_STATE_WARN_ON(x) \
|
|
|
|
I915_STATE_WARN((x), "%s", "WARN_ON(" __stringify(x) ")")
|
2014-10-28 22:32:30 +07:00
|
|
|
|
2018-02-02 00:32:48 +07:00
|
|
|
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
|
2018-06-09 18:10:58 +07:00
|
|
|
|
2016-03-16 18:39:08 +07:00
|
|
|
bool __i915_inject_load_failure(const char *func, int line);
|
|
|
|
#define i915_inject_load_failure() \
|
|
|
|
__i915_inject_load_failure(__func__, __LINE__)
|
2018-06-09 18:10:58 +07:00
|
|
|
|
|
|
|
bool i915_error_injected(void);
|
|
|
|
|
2018-02-02 00:32:48 +07:00
|
|
|
#else
|
2018-06-09 18:10:58 +07:00
|
|
|
|
2018-02-02 00:32:48 +07:00
|
|
|
#define i915_inject_load_failure() false
|
2018-06-09 18:10:58 +07:00
|
|
|
#define i915_error_injected() false
|
|
|
|
|
2018-02-02 00:32:48 +07:00
|
|
|
#endif
|
2016-03-16 18:39:08 +07:00
|
|
|
|
2018-06-09 18:10:58 +07:00
|
|
|
#define i915_load_error(i915, fmt, ...) \
|
|
|
|
__i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \
|
|
|
|
fmt, ##__VA_ARGS__)
|
|
|
|
|
2016-12-01 22:49:37 +07:00
|
|
|
typedef struct {
|
|
|
|
uint32_t val;
|
|
|
|
} uint_fixed_16_16_t;
|
|
|
|
|
|
|
|
#define FP_16_16_MAX ({ \
|
|
|
|
uint_fixed_16_16_t fp; \
|
|
|
|
fp.val = UINT_MAX; \
|
|
|
|
fp; \
|
|
|
|
})
|
|
|
|
|
2017-05-17 18:58:29 +07:00
|
|
|
static inline bool is_fixed16_zero(uint_fixed_16_16_t val)
|
|
|
|
{
|
|
|
|
if (val.val == 0)
|
|
|
|
return true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-07-05 21:31:46 +07:00
|
|
|
static inline uint_fixed_16_16_t u32_to_fixed16(uint32_t val)
|
2016-12-01 22:49:37 +07:00
|
|
|
{
|
|
|
|
uint_fixed_16_16_t fp;
|
|
|
|
|
2017-08-17 20:45:22 +07:00
|
|
|
WARN_ON(val > U16_MAX);
|
2016-12-01 22:49:37 +07:00
|
|
|
|
|
|
|
fp.val = val << 16;
|
|
|
|
return fp;
|
|
|
|
}
|
|
|
|
|
2017-07-05 21:31:46 +07:00
|
|
|
static inline uint32_t fixed16_to_u32_round_up(uint_fixed_16_16_t fp)
|
2016-12-01 22:49:37 +07:00
|
|
|
{
|
|
|
|
return DIV_ROUND_UP(fp.val, 1 << 16);
|
|
|
|
}
|
|
|
|
|
2017-07-05 21:31:46 +07:00
|
|
|
static inline uint32_t fixed16_to_u32(uint_fixed_16_16_t fp)
|
2016-12-01 22:49:37 +07:00
|
|
|
{
|
|
|
|
return fp.val >> 16;
|
|
|
|
}
|
|
|
|
|
2017-07-05 21:31:46 +07:00
|
|
|
static inline uint_fixed_16_16_t min_fixed16(uint_fixed_16_16_t min1,
|
2016-12-01 22:49:37 +07:00
|
|
|
uint_fixed_16_16_t min2)
|
|
|
|
{
|
|
|
|
uint_fixed_16_16_t min;
|
|
|
|
|
|
|
|
min.val = min(min1.val, min2.val);
|
|
|
|
return min;
|
|
|
|
}
|
|
|
|
|
2017-07-05 21:31:46 +07:00
|
|
|
static inline uint_fixed_16_16_t max_fixed16(uint_fixed_16_16_t max1,
|
2016-12-01 22:49:37 +07:00
|
|
|
uint_fixed_16_16_t max2)
|
|
|
|
{
|
|
|
|
uint_fixed_16_16_t max;
|
|
|
|
|
|
|
|
max.val = max(max1.val, max2.val);
|
|
|
|
return max;
|
|
|
|
}
|
|
|
|
|
2017-07-05 21:31:44 +07:00
|
|
|
static inline uint_fixed_16_16_t clamp_u64_to_fixed16(uint64_t val)
|
|
|
|
{
|
|
|
|
uint_fixed_16_16_t fp;
|
2017-08-17 20:45:22 +07:00
|
|
|
WARN_ON(val > U32_MAX);
|
|
|
|
fp.val = (uint32_t) val;
|
2017-07-05 21:31:44 +07:00
|
|
|
return fp;
|
|
|
|
}
|
|
|
|
|
2017-05-17 18:58:21 +07:00
|
|
|
static inline uint32_t div_round_up_fixed16(uint_fixed_16_16_t val,
|
|
|
|
uint_fixed_16_16_t d)
|
|
|
|
{
|
|
|
|
return DIV_ROUND_UP(val.val, d.val);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline uint32_t mul_round_up_u32_fixed16(uint32_t val,
|
|
|
|
uint_fixed_16_16_t mul)
|
|
|
|
{
|
|
|
|
uint64_t intermediate_val;
|
|
|
|
|
|
|
|
intermediate_val = (uint64_t) val * mul.val;
|
|
|
|
intermediate_val = DIV_ROUND_UP_ULL(intermediate_val, 1 << 16);
|
2017-08-17 20:45:22 +07:00
|
|
|
WARN_ON(intermediate_val > U32_MAX);
|
|
|
|
return (uint32_t) intermediate_val;
|
2017-05-17 18:58:21 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline uint_fixed_16_16_t mul_fixed16(uint_fixed_16_16_t val,
|
|
|
|
uint_fixed_16_16_t mul)
|
|
|
|
{
|
|
|
|
uint64_t intermediate_val;
|
|
|
|
|
|
|
|
intermediate_val = (uint64_t) val.val * mul.val;
|
|
|
|
intermediate_val = intermediate_val >> 16;
|
2017-07-05 21:31:44 +07:00
|
|
|
return clamp_u64_to_fixed16(intermediate_val);
|
2017-05-17 18:58:21 +07:00
|
|
|
}
|
|
|
|
|
2017-07-05 21:31:46 +07:00
|
|
|
static inline uint_fixed_16_16_t div_fixed16(uint32_t val, uint32_t d)
|
2016-12-01 22:49:37 +07:00
|
|
|
{
|
|
|
|
uint64_t interm_val;
|
|
|
|
|
|
|
|
interm_val = (uint64_t)val << 16;
|
|
|
|
interm_val = DIV_ROUND_UP_ULL(interm_val, d);
|
2017-07-05 21:31:44 +07:00
|
|
|
return clamp_u64_to_fixed16(interm_val);
|
2016-12-01 22:49:37 +07:00
|
|
|
}
|
|
|
|
|
2017-05-17 18:58:21 +07:00
|
|
|
static inline uint32_t div_round_up_u32_fixed16(uint32_t val,
|
|
|
|
uint_fixed_16_16_t d)
|
|
|
|
{
|
|
|
|
uint64_t interm_val;
|
|
|
|
|
|
|
|
interm_val = (uint64_t)val << 16;
|
|
|
|
interm_val = DIV_ROUND_UP_ULL(interm_val, d.val);
|
2017-08-17 20:45:22 +07:00
|
|
|
WARN_ON(interm_val > U32_MAX);
|
|
|
|
return (uint32_t) interm_val;
|
2017-05-17 18:58:21 +07:00
|
|
|
}
|
|
|
|
|
2017-07-05 21:31:46 +07:00
|
|
|
static inline uint_fixed_16_16_t mul_u32_fixed16(uint32_t val,
|
2016-12-01 22:49:37 +07:00
|
|
|
uint_fixed_16_16_t mul)
|
|
|
|
{
|
|
|
|
uint64_t intermediate_val;
|
|
|
|
|
|
|
|
intermediate_val = (uint64_t) val * mul.val;
|
2017-07-05 21:31:44 +07:00
|
|
|
return clamp_u64_to_fixed16(intermediate_val);
|
2016-12-01 22:49:37 +07:00
|
|
|
}
|
|
|
|
|
2017-07-05 21:31:47 +07:00
|
|
|
static inline uint_fixed_16_16_t add_fixed16(uint_fixed_16_16_t add1,
|
|
|
|
uint_fixed_16_16_t add2)
|
|
|
|
{
|
|
|
|
uint64_t interm_sum;
|
|
|
|
|
|
|
|
interm_sum = (uint64_t) add1.val + add2.val;
|
|
|
|
return clamp_u64_to_fixed16(interm_sum);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline uint_fixed_16_16_t add_fixed16_u32(uint_fixed_16_16_t add1,
|
|
|
|
uint32_t add2)
|
|
|
|
{
|
|
|
|
uint64_t interm_sum;
|
|
|
|
uint_fixed_16_16_t interm_add2 = u32_to_fixed16(add2);
|
|
|
|
|
|
|
|
interm_sum = (uint64_t) add1.val + interm_add2.val;
|
|
|
|
return clamp_u64_to_fixed16(interm_sum);
|
|
|
|
}
|
|
|
|
|
2013-02-26 00:06:49 +07:00
|
|
|
enum hpd_pin {
|
|
|
|
HPD_NONE = 0,
|
|
|
|
HPD_TV = HPD_NONE, /* TV is known to be unreliable */
|
|
|
|
HPD_CRT,
|
|
|
|
HPD_SDVO_B,
|
|
|
|
HPD_SDVO_C,
|
2015-07-22 05:32:45 +07:00
|
|
|
HPD_PORT_A,
|
2013-02-26 00:06:49 +07:00
|
|
|
HPD_PORT_B,
|
|
|
|
HPD_PORT_C,
|
|
|
|
HPD_PORT_D,
|
2015-08-17 14:55:50 +07:00
|
|
|
HPD_PORT_E,
|
2018-03-24 00:24:17 +07:00
|
|
|
HPD_PORT_F,
|
2013-02-26 00:06:49 +07:00
|
|
|
HPD_NUM_PINS
|
|
|
|
};
|
|
|
|
|
2015-05-28 19:43:48 +07:00
|
|
|
#define for_each_hpd_pin(__pin) \
|
|
|
|
for ((__pin) = (HPD_NONE + 1); (__pin) < HPD_NUM_PINS; (__pin)++)
|
|
|
|
|
2017-02-04 09:18:25 +07:00
|
|
|
#define HPD_STORM_DEFAULT_THRESHOLD 5
|
|
|
|
|
2015-05-27 19:03:42 +07:00
|
|
|
struct i915_hotplug {
|
|
|
|
struct work_struct hotplug_work;
|
|
|
|
|
|
|
|
struct {
|
|
|
|
unsigned long last_jiffies;
|
|
|
|
int count;
|
|
|
|
enum {
|
|
|
|
HPD_ENABLED = 0,
|
|
|
|
HPD_DISABLED = 1,
|
|
|
|
HPD_MARK_DISABLED = 2
|
|
|
|
} state;
|
|
|
|
} stats[HPD_NUM_PINS];
|
|
|
|
u32 event_bits;
|
|
|
|
struct delayed_work reenable_work;
|
|
|
|
|
|
|
|
u32 long_port_mask;
|
|
|
|
u32 short_port_mask;
|
|
|
|
struct work_struct dig_port_work;
|
|
|
|
|
2016-06-22 04:03:44 +07:00
|
|
|
struct work_struct poll_init_work;
|
|
|
|
bool poll_enabled;
|
|
|
|
|
2017-02-04 09:18:25 +07:00
|
|
|
unsigned int hpd_storm_threshold;
|
|
|
|
|
2015-05-27 19:03:42 +07:00
|
|
|
/*
|
|
|
|
* if we get a HPD irq from DP and a HPD irq from non-DP
|
|
|
|
* the non-DP HPD could block the workqueue on a mode config
|
|
|
|
* mutex getting, that userspace may have taken. However
|
|
|
|
* userspace is waiting on the DP workqueue to run which is
|
|
|
|
* blocked behind the non-DP one.
|
|
|
|
*/
|
|
|
|
struct workqueue_struct *dp_wq;
|
|
|
|
};
|
|
|
|
|
2012-12-03 18:49:06 +07:00
|
|
|
#define I915_GEM_GPU_DOMAINS \
|
|
|
|
(I915_GEM_DOMAIN_RENDER | \
|
|
|
|
I915_GEM_DOMAIN_SAMPLER | \
|
|
|
|
I915_GEM_DOMAIN_COMMAND | \
|
|
|
|
I915_GEM_DOMAIN_INSTRUCTION | \
|
|
|
|
I915_GEM_DOMAIN_VERTEX)
|
2010-05-22 03:26:39 +07:00
|
|
|
|
2013-06-05 18:34:14 +07:00
|
|
|
struct drm_i915_private;
|
2014-08-07 20:20:40 +07:00
|
|
|
struct i915_mm_struct;
|
drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).
v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.
v11: Drop vma behaviour changes -- locking is nigh on impossible.
Use a worker to load user pages to avoid lock inversions.
v12: Use get_task_mm()/mmput() for correct refcounting of mm.
v13: Use a worker to release the mmu_notifier to avoid lock inversion
v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
with its own locking and tree of objects for each mm/mmu_notifier.
v15: Prevent overlapping userptr objects, and invalidate all objects
within the mmu_notifier range
v16: Fix a typo for iterating over multiple objects in the range and
rearrange error path to destroy the mmu_notifier locklessly.
Also close a race between invalidate_range and the get_pages_worker.
v17: Close a race between get_pages_worker/invalidate_range and fresh
allocations of the same userptr range - and notice that
struct_mutex was presumed to be held when during creation it wasn't.
v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory
for the struct sg_table and to clear it before reporting an error.
v19: Always error out on read-only userptr requests as we don't have the
hardware infrastructure to support them at the moment.
v20: Refuse to implement read-only support until we have the required
infrastructure - but reserve the bit in flags for future use.
v21: use_mm() is not required for get_user_pages(). It is only meant to
be used to fix up the kernel thread's current->mm for use with
copy_user().
v22: Use sg_alloc_table_from_pages for that chunky feeling
v23: Export a function for sanity checking dma-buf rather than encode
userptr details elsewhere, and clean up comments based on
suggestions by Bradley.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Akash Goel <akash.goel@intel.com>
Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com>
[danvet: Frob ioctl allocation to pick the next one - will cause a bit
of fuss with create2 apparently, but such are the rules.]
[danvet2: oops, forgot to git add after manual patch application]
[danvet3: Appease sparse.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-05-16 20:22:37 +07:00
|
|
|
struct i915_mmu_object;
|
2013-06-05 18:34:14 +07:00
|
|
|
|
2015-04-27 19:41:20 +07:00
|
|
|
struct drm_i915_file_private {
|
|
|
|
struct drm_i915_private *dev_priv;
|
|
|
|
struct drm_file *file;
|
|
|
|
|
|
|
|
struct {
|
|
|
|
spinlock_t lock;
|
|
|
|
struct list_head request_list;
|
2015-05-22 03:01:48 +07:00
|
|
|
/* 20ms is a fairly arbitrary limit (greater than the average frame time)
|
|
|
|
* chosen to prevent the CPU getting more than a frame ahead of the GPU
|
|
|
|
* (when using lax throttling for the frontbuffer). We also use it to
|
|
|
|
* offer free GPU waitboosts for severely congested workloads.
|
|
|
|
*/
|
|
|
|
#define DRM_I915_THROTTLE_JIFFIES msecs_to_jiffies(20)
|
2015-04-27 19:41:20 +07:00
|
|
|
} mm;
|
|
|
|
struct idr context_idr;
|
|
|
|
|
2015-04-27 19:41:22 +07:00
|
|
|
struct intel_rps_client {
|
2017-06-28 19:35:48 +07:00
|
|
|
atomic_t boosts;
|
2017-10-11 04:30:06 +07:00
|
|
|
} rps_client;
|
2015-04-27 19:41:20 +07:00
|
|
|
|
2016-07-27 15:07:27 +07:00
|
|
|
unsigned int bsd_engine;
|
2016-11-18 20:10:47 +07:00
|
|
|
|
2018-06-15 17:44:29 +07:00
|
|
|
/*
|
|
|
|
* Every context ban increments per client ban score. Also
|
|
|
|
* hangs in short succession increments ban score. If ban threshold
|
|
|
|
* is reached, client is considered banned and submitting more work
|
|
|
|
* will fail. This is a stop gap measure to limit the badly behaving
|
|
|
|
* clients access to gpu. Note that unbannable contexts never increment
|
|
|
|
* the client ban score.
|
2016-11-18 20:10:47 +07:00
|
|
|
*/
|
2018-06-15 17:44:29 +07:00
|
|
|
#define I915_CLIENT_SCORE_HANG_FAST 1
|
|
|
|
#define I915_CLIENT_FAST_HANG_JIFFIES (60 * HZ)
|
|
|
|
#define I915_CLIENT_SCORE_CONTEXT_BAN 3
|
|
|
|
#define I915_CLIENT_SCORE_BANNED 9
|
|
|
|
/** ban_score: Accumulated score of all ctx bans and fast hangs. */
|
|
|
|
atomic_t ban_score;
|
|
|
|
unsigned long hang_timestamp;
|
2015-04-27 19:41:20 +07:00
|
|
|
};
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Interface history:
|
|
|
|
*
|
|
|
|
* 1.1: Original.
|
2006-01-02 16:14:23 +07:00
|
|
|
* 1.2: Add Power Management
|
|
|
|
* 1.3: Add vblank support
|
2006-01-25 11:31:43 +07:00
|
|
|
* 1.4: Fix cmdbuffer path, add heap destroy
|
2006-06-24 14:07:34 +07:00
|
|
|
* 1.5: Add vblank pipe configuration
|
2006-10-24 22:05:09 +07:00
|
|
|
* 1.6: - New ioctl for scheduling buffer swaps on vertical blank
|
|
|
|
* - Support vertical blank on secondary display pipe
|
2005-04-17 05:20:36 +07:00
|
|
|
*/
|
|
|
|
#define DRIVER_MAJOR 1
|
2006-10-24 22:05:09 +07:00
|
|
|
#define DRIVER_MINOR 6
|
2005-04-17 05:20:36 +07:00
|
|
|
#define DRIVER_PATCHLEVEL 0
|
|
|
|
|
2010-08-05 02:26:07 +07:00
|
|
|
struct intel_overlay;
|
|
|
|
struct intel_overlay_error_state;
|
|
|
|
|
2009-05-31 16:17:17 +07:00
|
|
|
struct sdvo_device_mapping {
|
2010-09-24 18:52:03 +07:00
|
|
|
u8 initialized;
|
2009-05-31 16:17:17 +07:00
|
|
|
u8 dvo_port;
|
|
|
|
u8 slave_addr;
|
|
|
|
u8 dvo_wiring;
|
2010-09-24 18:52:03 +07:00
|
|
|
u8 i2c_pin;
|
2010-04-24 03:07:40 +07:00
|
|
|
u8 ddc_pin;
|
2009-05-31 16:17:17 +07:00
|
|
|
};
|
|
|
|
|
2013-11-08 21:48:56 +07:00
|
|
|
struct intel_connector;
|
2014-10-27 21:26:47 +07:00
|
|
|
struct intel_encoder;
|
2016-11-08 19:55:32 +07:00
|
|
|
struct intel_atomic_state;
|
2015-01-15 19:55:21 +07:00
|
|
|
struct intel_crtc_state;
|
2015-01-20 19:51:52 +07:00
|
|
|
struct intel_initial_plane_config;
|
2013-03-28 16:42:00 +07:00
|
|
|
struct intel_crtc;
|
2013-06-04 03:40:22 +07:00
|
|
|
struct intel_limit;
|
|
|
|
struct dpll;
|
2017-02-08 01:33:45 +07:00
|
|
|
struct intel_cdclk_state;
|
2013-03-27 06:44:50 +07:00
|
|
|
|
2009-09-22 00:42:27 +07:00
|
|
|
struct drm_i915_display_funcs {
|
2017-02-08 01:33:45 +07:00
|
|
|
void (*get_cdclk)(struct drm_i915_private *dev_priv,
|
|
|
|
struct intel_cdclk_state *cdclk_state);
|
2017-01-27 02:52:01 +07:00
|
|
|
void (*set_cdclk)(struct drm_i915_private *dev_priv,
|
|
|
|
const struct intel_cdclk_state *cdclk_state);
|
2017-11-18 02:19:11 +07:00
|
|
|
int (*get_fifo_size)(struct drm_i915_private *dev_priv,
|
|
|
|
enum i9xx_plane_id i9xx_plane);
|
2016-03-01 17:07:22 +07:00
|
|
|
int (*compute_pipe_wm)(struct intel_crtc_state *cstate);
|
drm/i915: Add two-stage ILK-style watermark programming (v11)
In addition to calculating final watermarks, let's also pre-calculate a
set of intermediate watermark values at atomic check time. These
intermediate watermarks are a combination of the watermarks for the old
state and the new state; they should satisfy the requirements of both
states which means they can be programmed immediately when we commit the
atomic state (without waiting for a vblank). Once the vblank does
happen, we can then re-program watermarks to the more optimal final
value.
v2: Significant rebasing/rewriting.
v3:
- Move 'need_postvbl_update' flag to CRTC state (Daniel)
- Don't forget to check intermediate watermark values for validity
(Maarten)
- Don't due async watermark optimization; just do it at the end of the
atomic transaction, after waiting for vblanks. We do want it to be
async eventually, but adding that now will cause more trouble for
Maarten's in-progress work. (Maarten)
- Don't allocate space in crtc_state for intermediate watermarks on
platforms that don't need it (gen9+).
- Move WaCxSRDisabledForSpriteScaling:ivb into intel_begin_crtc_commit
now that ilk_update_wm is gone.
v4:
- Add a wm_mutex to cover updates to intel_crtc->active and the
need_postvbl_update flag. Since we don't have async yet it isn't
terribly important yet, but might as well add it now.
- Change interface to program watermarks. Platforms will now expose
.initial_watermarks() and .optimize_watermarks() functions to do
watermark programming. These should lock wm_mutex, copy the
appropriate state values into intel_crtc->active, and then call
the internal program watermarks function.
v5:
- Skip intermediate watermark calculation/check during initial hardware
readout since we don't trust the existing HW values (and don't have
valid values of our own yet).
- Don't try to call .optimize_watermarks() on platforms that don't have
atomic watermarks yet. (Maarten)
v6:
- Rebase
v7:
- Further rebase
v8:
- A few minor indentation and line length fixes
v9:
- Yet another rebase since Maarten's patches reworked a bunch of the
code (wm_pre, wm_post, etc.) that this was previously based on.
v10:
- Move wm_mutex to dev_priv to protect against racing commits against
disjoint CRTC sets. (Maarten)
- Drop unnecessary clearing of cstate->wm.need_postvbl_update (Maarten)
v11:
- Now that we've moved to atomic watermark updates, make sure we call
the proper function to program watermarks in
{ironlake,haswell}_crtc_enable(); the failure to do so on the
previous patch iteration led to us not actually programming the
watermarks before turning on the CRTC, which was the cause of the
underruns that the CI system was seeing.
- Fix inverted logic for determining when to optimize watermarks. We
were needlessly optimizing when the intermediate/optimal values were
the same (harmless), but not actually optimizing when they differed
(also harmless, but wasteful from a power/bandwidth perspective).
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1456276813-5689-1-git-send-email-matthew.d.roper@intel.com
2016-02-24 08:20:13 +07:00
|
|
|
int (*compute_intermediate_wm)(struct drm_device *dev,
|
|
|
|
struct intel_crtc *intel_crtc,
|
|
|
|
struct intel_crtc_state *newstate);
|
2016-11-08 19:55:32 +07:00
|
|
|
void (*initial_watermarks)(struct intel_atomic_state *state,
|
|
|
|
struct intel_crtc_state *cstate);
|
|
|
|
void (*atomic_update_watermarks)(struct intel_atomic_state *state,
|
|
|
|
struct intel_crtc_state *cstate);
|
|
|
|
void (*optimize_watermarks)(struct intel_atomic_state *state,
|
|
|
|
struct intel_crtc_state *cstate);
|
2016-05-12 21:06:03 +07:00
|
|
|
int (*compute_global_watermarks)(struct drm_atomic_state *state);
|
2016-11-01 03:37:03 +07:00
|
|
|
void (*update_wm)(struct intel_crtc *crtc);
|
2015-06-15 17:33:56 +07:00
|
|
|
int (*modeset_calc_cdclk)(struct drm_atomic_state *state);
|
2013-03-28 16:42:00 +07:00
|
|
|
/* Returns the active state of the crtc, and if the crtc is active,
|
|
|
|
* fills out the pipe-config with the hw state. */
|
|
|
|
bool (*get_pipe_config)(struct intel_crtc *,
|
2015-01-15 19:55:21 +07:00
|
|
|
struct intel_crtc_state *);
|
2015-01-20 19:51:52 +07:00
|
|
|
void (*get_initial_plane_config)(struct intel_crtc *,
|
|
|
|
struct intel_initial_plane_config *);
|
2015-01-15 19:55:23 +07:00
|
|
|
int (*crtc_compute_clock)(struct intel_crtc *crtc,
|
|
|
|
struct intel_crtc_state *crtc_state);
|
2016-08-09 22:04:01 +07:00
|
|
|
void (*crtc_enable)(struct intel_crtc_state *pipe_config,
|
|
|
|
struct drm_atomic_state *old_state);
|
|
|
|
void (*crtc_disable)(struct intel_crtc_state *old_crtc_state,
|
|
|
|
struct drm_atomic_state *old_state);
|
2017-09-04 17:48:33 +07:00
|
|
|
void (*update_crtcs)(struct drm_atomic_state *state);
|
2017-10-31 01:46:53 +07:00
|
|
|
void (*audio_codec_enable)(struct intel_encoder *encoder,
|
|
|
|
const struct intel_crtc_state *crtc_state,
|
|
|
|
const struct drm_connector_state *conn_state);
|
|
|
|
void (*audio_codec_disable)(struct intel_encoder *encoder,
|
|
|
|
const struct intel_crtc_state *old_crtc_state,
|
|
|
|
const struct drm_connector_state *old_conn_state);
|
2017-03-02 19:58:54 +07:00
|
|
|
void (*fdi_link_train)(struct intel_crtc *crtc,
|
|
|
|
const struct intel_crtc_state *crtc_state);
|
2016-11-01 03:37:22 +07:00
|
|
|
void (*init_clock_gating)(struct drm_i915_private *dev_priv);
|
2016-05-06 20:48:28 +07:00
|
|
|
void (*hpd_irq_setup)(struct drm_i915_private *dev_priv);
|
2009-09-22 00:42:27 +07:00
|
|
|
/* clock updates for mode set */
|
|
|
|
/* cursor updates */
|
|
|
|
/* render clock increase/decrease */
|
|
|
|
/* display clock increase/decrease */
|
|
|
|
/* pll clock increase/decrease */
|
2016-03-16 17:57:14 +07:00
|
|
|
|
2016-03-30 22:16:34 +07:00
|
|
|
void (*load_csc_matrix)(struct drm_crtc_state *crtc_state);
|
|
|
|
void (*load_luts)(struct drm_crtc_state *crtc_state);
|
2009-09-22 00:42:27 +07:00
|
|
|
};
|
|
|
|
|
2015-10-27 19:46:59 +07:00
|
|
|
#define CSR_VERSION(major, minor) ((major) << 16 | (minor))
|
|
|
|
#define CSR_VERSION_MAJOR(version) ((version) >> 16)
|
|
|
|
#define CSR_VERSION_MINOR(version) ((version) & 0xffff)
|
|
|
|
|
drm/i915/skl: Add support to load SKL CSR firmware.
Display Context Save and Restore support is needed for
various SKL Display C states like DC5, DC6.
This implementation is added based on first version of DMC CSR program
that we received from h/w team.
Here we are using request_firmware based design.
Finally this firmware should end up in linux-firmware tree.
For SKL platform its mandatory to ensure that we load this
csr program before enabling DC states like DC5/DC6.
As CSR program gets reset on various conditions, we should ensure
to load it during boot and in future change to be added to load
this system resume sequence too.
v1: Initial relese as RFC patch
v2: Design change as per Daniel, Damien and Shobit's review comments
request firmware method followed.
v3: Some optimization and functional changes.
Pulled register defines into drivers/gpu/drm/i915/i915_reg.h
Used kmemdup to allocate and duplicate firmware content.
Ensured to free allocated buffer.
v4: Modified as per review comments from Satheesh and Daniel
Removed temporary buffer.
Optimized number of writes by replacing I915_WRITE with I915_WRITE64.
v5:
Modified as per review comemnts from Damien.
- Changed name for functions and firmware.
- Introduced HAS_CSR.
- Reverted back previous change and used csr_buf with u8 size.
- Using cpu_to_be64 for endianness change.
Modified as per review comments from Imre.
- Modified registers and macro names to be a bit closer to bspec terminology
and the existing register naming in the driver.
- Early return for non SKL platforms in intel_load_csr_program function.
- Added locking around CSR program load function as it may be called
concurrently during system/runtime resume.
- Releasing the fw before loading the program for consistency
- Handled error path during f/w load.
v6: Modified as per review comments from Imre.
- Corrected out_freecsr sequence.
v7: Modified as per review comments from Imre.
Fail loading fw if fw->size%8!=0.
v8: Rebase to latest.
v9: Rebase on top of -nightly (Damien)
v10: Enabled support for dmc firmware ver 1.0.
According to ver 1.0 in a single binary package all the firmware's that are
required for different stepping's of the product will be stored. The package
contains the css header, followed by the package header and the actual dmc
firmwares. Package header contains the firmware/stepping mapping table and
the corresponding firmware offsets to the individual binaries, within the
package. Each individual program binary contains the header and the payload
sections whose size is specified in the header section. This changes are done
to extract the specific firmaware from the package. (Animesh)
v11: Modified as per review comemnts from Imre.
- Added code comment from bpec for header structure elements.
- Added __packed to avoid structure padding.
- Added helper functions for stepping and substepping info.
- Added code comment for CSR_MAX_FW_SIZE.
- Disabled BXT firmware loading, will be enabled with dmc 1.0 support.
- Changed skl_stepping_info based on bspec, earlier used from config DB.
- Removed duplicate call of cpu_to_be* from intel_csr_load_program function.
- Used cpu_to_be32 instead of cpu_to_be64 as firmware binary in dword aligned.
- Added sanity check for header length.
- Added sanity check for mmio address got from firmware binary.
- kmalloc done separately for dmc header and dmc firmware. (Animesh)
v12: Modified as per review comemnts from Imre.
- Corrected the typo error in skl stepping info structure.
- Added out-of-bound access for skl_stepping_info.
- Sanity check for mmio address modified.
- Sanity check added for stepping and substeppig.
- Modified the intel_dmc_info structure, cache only the required header info. (Animesh)
v13: clarify firmware load error message.
The reason for a firmware loading failure can be obscure if the driver
is built-in. Provide an explanation to the user about the likely reason for
the failure and how to resolve it. (Imre)
v14: Suggested by Jani.
- fix s/I915/CONFIG_DRM_I915/ typo
- add fw_path to the firmware object instead of using a static ptr (Jani)
v15:
1) Changed the firmware name as dmc_gen9.bin, everytime for a new firmware version a symbolic link
with same name will help not to build kernel again.
2) Changes done as per review comments from Imre.
- Error check removed for intel_csr_ucode_init.
- Moved csr-specific data structure to intel_csr.h and optimization done on structure definition.
- fw->data used directly for parsing the header info & memory allocation
only done separately for payload. (Animesh)
v16:
- No need for out_regs label in i915_driver_load(), so removed it.
- Changed the firmware name as skl_dmc_ver1.bin, followed naming convention <platform>_dmc_<api-version>.bin (Animesh)
Issue: VIZ-2569
Signed-off-by: A.Sunil Kamath <sunil.kamath@intel.com>
Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-05-04 19:58:44 +07:00
|
|
|
struct intel_csr {
|
2015-10-29 04:59:04 +07:00
|
|
|
struct work_struct work;
|
drm/i915/skl: Add support to load SKL CSR firmware.
Display Context Save and Restore support is needed for
various SKL Display C states like DC5, DC6.
This implementation is added based on first version of DMC CSR program
that we received from h/w team.
Here we are using request_firmware based design.
Finally this firmware should end up in linux-firmware tree.
For SKL platform its mandatory to ensure that we load this
csr program before enabling DC states like DC5/DC6.
As CSR program gets reset on various conditions, we should ensure
to load it during boot and in future change to be added to load
this system resume sequence too.
v1: Initial relese as RFC patch
v2: Design change as per Daniel, Damien and Shobit's review comments
request firmware method followed.
v3: Some optimization and functional changes.
Pulled register defines into drivers/gpu/drm/i915/i915_reg.h
Used kmemdup to allocate and duplicate firmware content.
Ensured to free allocated buffer.
v4: Modified as per review comments from Satheesh and Daniel
Removed temporary buffer.
Optimized number of writes by replacing I915_WRITE with I915_WRITE64.
v5:
Modified as per review comemnts from Damien.
- Changed name for functions and firmware.
- Introduced HAS_CSR.
- Reverted back previous change and used csr_buf with u8 size.
- Using cpu_to_be64 for endianness change.
Modified as per review comments from Imre.
- Modified registers and macro names to be a bit closer to bspec terminology
and the existing register naming in the driver.
- Early return for non SKL platforms in intel_load_csr_program function.
- Added locking around CSR program load function as it may be called
concurrently during system/runtime resume.
- Releasing the fw before loading the program for consistency
- Handled error path during f/w load.
v6: Modified as per review comments from Imre.
- Corrected out_freecsr sequence.
v7: Modified as per review comments from Imre.
Fail loading fw if fw->size%8!=0.
v8: Rebase to latest.
v9: Rebase on top of -nightly (Damien)
v10: Enabled support for dmc firmware ver 1.0.
According to ver 1.0 in a single binary package all the firmware's that are
required for different stepping's of the product will be stored. The package
contains the css header, followed by the package header and the actual dmc
firmwares. Package header contains the firmware/stepping mapping table and
the corresponding firmware offsets to the individual binaries, within the
package. Each individual program binary contains the header and the payload
sections whose size is specified in the header section. This changes are done
to extract the specific firmaware from the package. (Animesh)
v11: Modified as per review comemnts from Imre.
- Added code comment from bpec for header structure elements.
- Added __packed to avoid structure padding.
- Added helper functions for stepping and substepping info.
- Added code comment for CSR_MAX_FW_SIZE.
- Disabled BXT firmware loading, will be enabled with dmc 1.0 support.
- Changed skl_stepping_info based on bspec, earlier used from config DB.
- Removed duplicate call of cpu_to_be* from intel_csr_load_program function.
- Used cpu_to_be32 instead of cpu_to_be64 as firmware binary in dword aligned.
- Added sanity check for header length.
- Added sanity check for mmio address got from firmware binary.
- kmalloc done separately for dmc header and dmc firmware. (Animesh)
v12: Modified as per review comemnts from Imre.
- Corrected the typo error in skl stepping info structure.
- Added out-of-bound access for skl_stepping_info.
- Sanity check for mmio address modified.
- Sanity check added for stepping and substeppig.
- Modified the intel_dmc_info structure, cache only the required header info. (Animesh)
v13: clarify firmware load error message.
The reason for a firmware loading failure can be obscure if the driver
is built-in. Provide an explanation to the user about the likely reason for
the failure and how to resolve it. (Imre)
v14: Suggested by Jani.
- fix s/I915/CONFIG_DRM_I915/ typo
- add fw_path to the firmware object instead of using a static ptr (Jani)
v15:
1) Changed the firmware name as dmc_gen9.bin, everytime for a new firmware version a symbolic link
with same name will help not to build kernel again.
2) Changes done as per review comments from Imre.
- Error check removed for intel_csr_ucode_init.
- Moved csr-specific data structure to intel_csr.h and optimization done on structure definition.
- fw->data used directly for parsing the header info & memory allocation
only done separately for payload. (Animesh)
v16:
- No need for out_regs label in i915_driver_load(), so removed it.
- Changed the firmware name as skl_dmc_ver1.bin, followed naming convention <platform>_dmc_<api-version>.bin (Animesh)
Issue: VIZ-2569
Signed-off-by: A.Sunil Kamath <sunil.kamath@intel.com>
Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-05-04 19:58:44 +07:00
|
|
|
const char *fw_path;
|
2015-08-03 23:25:32 +07:00
|
|
|
uint32_t *dmc_payload;
|
drm/i915/skl: Add support to load SKL CSR firmware.
Display Context Save and Restore support is needed for
various SKL Display C states like DC5, DC6.
This implementation is added based on first version of DMC CSR program
that we received from h/w team.
Here we are using request_firmware based design.
Finally this firmware should end up in linux-firmware tree.
For SKL platform its mandatory to ensure that we load this
csr program before enabling DC states like DC5/DC6.
As CSR program gets reset on various conditions, we should ensure
to load it during boot and in future change to be added to load
this system resume sequence too.
v1: Initial relese as RFC patch
v2: Design change as per Daniel, Damien and Shobit's review comments
request firmware method followed.
v3: Some optimization and functional changes.
Pulled register defines into drivers/gpu/drm/i915/i915_reg.h
Used kmemdup to allocate and duplicate firmware content.
Ensured to free allocated buffer.
v4: Modified as per review comments from Satheesh and Daniel
Removed temporary buffer.
Optimized number of writes by replacing I915_WRITE with I915_WRITE64.
v5:
Modified as per review comemnts from Damien.
- Changed name for functions and firmware.
- Introduced HAS_CSR.
- Reverted back previous change and used csr_buf with u8 size.
- Using cpu_to_be64 for endianness change.
Modified as per review comments from Imre.
- Modified registers and macro names to be a bit closer to bspec terminology
and the existing register naming in the driver.
- Early return for non SKL platforms in intel_load_csr_program function.
- Added locking around CSR program load function as it may be called
concurrently during system/runtime resume.
- Releasing the fw before loading the program for consistency
- Handled error path during f/w load.
v6: Modified as per review comments from Imre.
- Corrected out_freecsr sequence.
v7: Modified as per review comments from Imre.
Fail loading fw if fw->size%8!=0.
v8: Rebase to latest.
v9: Rebase on top of -nightly (Damien)
v10: Enabled support for dmc firmware ver 1.0.
According to ver 1.0 in a single binary package all the firmware's that are
required for different stepping's of the product will be stored. The package
contains the css header, followed by the package header and the actual dmc
firmwares. Package header contains the firmware/stepping mapping table and
the corresponding firmware offsets to the individual binaries, within the
package. Each individual program binary contains the header and the payload
sections whose size is specified in the header section. This changes are done
to extract the specific firmaware from the package. (Animesh)
v11: Modified as per review comemnts from Imre.
- Added code comment from bpec for header structure elements.
- Added __packed to avoid structure padding.
- Added helper functions for stepping and substepping info.
- Added code comment for CSR_MAX_FW_SIZE.
- Disabled BXT firmware loading, will be enabled with dmc 1.0 support.
- Changed skl_stepping_info based on bspec, earlier used from config DB.
- Removed duplicate call of cpu_to_be* from intel_csr_load_program function.
- Used cpu_to_be32 instead of cpu_to_be64 as firmware binary in dword aligned.
- Added sanity check for header length.
- Added sanity check for mmio address got from firmware binary.
- kmalloc done separately for dmc header and dmc firmware. (Animesh)
v12: Modified as per review comemnts from Imre.
- Corrected the typo error in skl stepping info structure.
- Added out-of-bound access for skl_stepping_info.
- Sanity check for mmio address modified.
- Sanity check added for stepping and substeppig.
- Modified the intel_dmc_info structure, cache only the required header info. (Animesh)
v13: clarify firmware load error message.
The reason for a firmware loading failure can be obscure if the driver
is built-in. Provide an explanation to the user about the likely reason for
the failure and how to resolve it. (Imre)
v14: Suggested by Jani.
- fix s/I915/CONFIG_DRM_I915/ typo
- add fw_path to the firmware object instead of using a static ptr (Jani)
v15:
1) Changed the firmware name as dmc_gen9.bin, everytime for a new firmware version a symbolic link
with same name will help not to build kernel again.
2) Changes done as per review comments from Imre.
- Error check removed for intel_csr_ucode_init.
- Moved csr-specific data structure to intel_csr.h and optimization done on structure definition.
- fw->data used directly for parsing the header info & memory allocation
only done separately for payload. (Animesh)
v16:
- No need for out_regs label in i915_driver_load(), so removed it.
- Changed the firmware name as skl_dmc_ver1.bin, followed naming convention <platform>_dmc_<api-version>.bin (Animesh)
Issue: VIZ-2569
Signed-off-by: A.Sunil Kamath <sunil.kamath@intel.com>
Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-05-04 19:58:44 +07:00
|
|
|
uint32_t dmc_fw_size;
|
2015-10-27 19:46:59 +07:00
|
|
|
uint32_t version;
|
drm/i915/skl: Add support to load SKL CSR firmware.
Display Context Save and Restore support is needed for
various SKL Display C states like DC5, DC6.
This implementation is added based on first version of DMC CSR program
that we received from h/w team.
Here we are using request_firmware based design.
Finally this firmware should end up in linux-firmware tree.
For SKL platform its mandatory to ensure that we load this
csr program before enabling DC states like DC5/DC6.
As CSR program gets reset on various conditions, we should ensure
to load it during boot and in future change to be added to load
this system resume sequence too.
v1: Initial relese as RFC patch
v2: Design change as per Daniel, Damien and Shobit's review comments
request firmware method followed.
v3: Some optimization and functional changes.
Pulled register defines into drivers/gpu/drm/i915/i915_reg.h
Used kmemdup to allocate and duplicate firmware content.
Ensured to free allocated buffer.
v4: Modified as per review comments from Satheesh and Daniel
Removed temporary buffer.
Optimized number of writes by replacing I915_WRITE with I915_WRITE64.
v5:
Modified as per review comemnts from Damien.
- Changed name for functions and firmware.
- Introduced HAS_CSR.
- Reverted back previous change and used csr_buf with u8 size.
- Using cpu_to_be64 for endianness change.
Modified as per review comments from Imre.
- Modified registers and macro names to be a bit closer to bspec terminology
and the existing register naming in the driver.
- Early return for non SKL platforms in intel_load_csr_program function.
- Added locking around CSR program load function as it may be called
concurrently during system/runtime resume.
- Releasing the fw before loading the program for consistency
- Handled error path during f/w load.
v6: Modified as per review comments from Imre.
- Corrected out_freecsr sequence.
v7: Modified as per review comments from Imre.
Fail loading fw if fw->size%8!=0.
v8: Rebase to latest.
v9: Rebase on top of -nightly (Damien)
v10: Enabled support for dmc firmware ver 1.0.
According to ver 1.0 in a single binary package all the firmware's that are
required for different stepping's of the product will be stored. The package
contains the css header, followed by the package header and the actual dmc
firmwares. Package header contains the firmware/stepping mapping table and
the corresponding firmware offsets to the individual binaries, within the
package. Each individual program binary contains the header and the payload
sections whose size is specified in the header section. This changes are done
to extract the specific firmaware from the package. (Animesh)
v11: Modified as per review comemnts from Imre.
- Added code comment from bpec for header structure elements.
- Added __packed to avoid structure padding.
- Added helper functions for stepping and substepping info.
- Added code comment for CSR_MAX_FW_SIZE.
- Disabled BXT firmware loading, will be enabled with dmc 1.0 support.
- Changed skl_stepping_info based on bspec, earlier used from config DB.
- Removed duplicate call of cpu_to_be* from intel_csr_load_program function.
- Used cpu_to_be32 instead of cpu_to_be64 as firmware binary in dword aligned.
- Added sanity check for header length.
- Added sanity check for mmio address got from firmware binary.
- kmalloc done separately for dmc header and dmc firmware. (Animesh)
v12: Modified as per review comemnts from Imre.
- Corrected the typo error in skl stepping info structure.
- Added out-of-bound access for skl_stepping_info.
- Sanity check for mmio address modified.
- Sanity check added for stepping and substeppig.
- Modified the intel_dmc_info structure, cache only the required header info. (Animesh)
v13: clarify firmware load error message.
The reason for a firmware loading failure can be obscure if the driver
is built-in. Provide an explanation to the user about the likely reason for
the failure and how to resolve it. (Imre)
v14: Suggested by Jani.
- fix s/I915/CONFIG_DRM_I915/ typo
- add fw_path to the firmware object instead of using a static ptr (Jani)
v15:
1) Changed the firmware name as dmc_gen9.bin, everytime for a new firmware version a symbolic link
with same name will help not to build kernel again.
2) Changes done as per review comments from Imre.
- Error check removed for intel_csr_ucode_init.
- Moved csr-specific data structure to intel_csr.h and optimization done on structure definition.
- fw->data used directly for parsing the header info & memory allocation
only done separately for payload. (Animesh)
v16:
- No need for out_regs label in i915_driver_load(), so removed it.
- Changed the firmware name as skl_dmc_ver1.bin, followed naming convention <platform>_dmc_<api-version>.bin (Animesh)
Issue: VIZ-2569
Signed-off-by: A.Sunil Kamath <sunil.kamath@intel.com>
Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-05-04 19:58:44 +07:00
|
|
|
uint32_t mmio_count;
|
drm/i915: Type safe register read/write
Make I915_READ and I915_WRITE more type safe by wrapping the register
offset in a struct. This should eliminate most of the fumbles we've had
with misplaced parens.
This only takes care of normal mmio registers. We could extend the idea
to other register types and define each with its own struct. That way
you wouldn't be able to accidentally pass the wrong thing to a specific
register access function.
The gpio_reg setup is probably the ugliest thing left. But I figure I'd
just leave it for now, and wait for some divine inspiration to strike
before making it nice.
As for the generated code, it's actually a bit better sometimes. Eg.
looking at i915_irq_handler(), we can see the following change:
lea 0x70024(%rdx,%rax,1),%r9d
mov $0x1,%edx
- movslq %r9d,%r9
- mov %r9,%rsi
- mov %r9,-0x58(%rbp)
- callq *0xd8(%rbx)
+ mov %r9d,%esi
+ mov %r9d,-0x48(%rbp)
callq *0xd8(%rbx)
So previously gcc thought the register offset might be signed and
decided to sign extend it, just in case. The rest appears to be
mostly just minor shuffling of instructions.
v2: i915_mmio_reg_{offset,equal,valid}() helpers added
s/_REG/_MMIO/ in the register defines
mo more switch statements left to worry about
ring_emit stuff got sorted in a prep patch
cmd parser, lrc context and w/a batch buildup also in prep patch
vgpu stuff cleaned up and moved to a prep patch
all other unrelated changes split out
v3: Rebased due to BXT DSI/BLC, MOCS, etc.
v4: Rebased due to churn, s/i915_mmio_reg_t/i915_reg_t/
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/1447853606-2751-1-git-send-email-ville.syrjala@linux.intel.com
2015-11-18 20:33:26 +07:00
|
|
|
i915_reg_t mmioaddr[8];
|
drm/i915/skl: Add support to load SKL CSR firmware.
Display Context Save and Restore support is needed for
various SKL Display C states like DC5, DC6.
This implementation is added based on first version of DMC CSR program
that we received from h/w team.
Here we are using request_firmware based design.
Finally this firmware should end up in linux-firmware tree.
For SKL platform its mandatory to ensure that we load this
csr program before enabling DC states like DC5/DC6.
As CSR program gets reset on various conditions, we should ensure
to load it during boot and in future change to be added to load
this system resume sequence too.
v1: Initial relese as RFC patch
v2: Design change as per Daniel, Damien and Shobit's review comments
request firmware method followed.
v3: Some optimization and functional changes.
Pulled register defines into drivers/gpu/drm/i915/i915_reg.h
Used kmemdup to allocate and duplicate firmware content.
Ensured to free allocated buffer.
v4: Modified as per review comments from Satheesh and Daniel
Removed temporary buffer.
Optimized number of writes by replacing I915_WRITE with I915_WRITE64.
v5:
Modified as per review comemnts from Damien.
- Changed name for functions and firmware.
- Introduced HAS_CSR.
- Reverted back previous change and used csr_buf with u8 size.
- Using cpu_to_be64 for endianness change.
Modified as per review comments from Imre.
- Modified registers and macro names to be a bit closer to bspec terminology
and the existing register naming in the driver.
- Early return for non SKL platforms in intel_load_csr_program function.
- Added locking around CSR program load function as it may be called
concurrently during system/runtime resume.
- Releasing the fw before loading the program for consistency
- Handled error path during f/w load.
v6: Modified as per review comments from Imre.
- Corrected out_freecsr sequence.
v7: Modified as per review comments from Imre.
Fail loading fw if fw->size%8!=0.
v8: Rebase to latest.
v9: Rebase on top of -nightly (Damien)
v10: Enabled support for dmc firmware ver 1.0.
According to ver 1.0 in a single binary package all the firmware's that are
required for different stepping's of the product will be stored. The package
contains the css header, followed by the package header and the actual dmc
firmwares. Package header contains the firmware/stepping mapping table and
the corresponding firmware offsets to the individual binaries, within the
package. Each individual program binary contains the header and the payload
sections whose size is specified in the header section. This changes are done
to extract the specific firmaware from the package. (Animesh)
v11: Modified as per review comemnts from Imre.
- Added code comment from bpec for header structure elements.
- Added __packed to avoid structure padding.
- Added helper functions for stepping and substepping info.
- Added code comment for CSR_MAX_FW_SIZE.
- Disabled BXT firmware loading, will be enabled with dmc 1.0 support.
- Changed skl_stepping_info based on bspec, earlier used from config DB.
- Removed duplicate call of cpu_to_be* from intel_csr_load_program function.
- Used cpu_to_be32 instead of cpu_to_be64 as firmware binary in dword aligned.
- Added sanity check for header length.
- Added sanity check for mmio address got from firmware binary.
- kmalloc done separately for dmc header and dmc firmware. (Animesh)
v12: Modified as per review comemnts from Imre.
- Corrected the typo error in skl stepping info structure.
- Added out-of-bound access for skl_stepping_info.
- Sanity check for mmio address modified.
- Sanity check added for stepping and substeppig.
- Modified the intel_dmc_info structure, cache only the required header info. (Animesh)
v13: clarify firmware load error message.
The reason for a firmware loading failure can be obscure if the driver
is built-in. Provide an explanation to the user about the likely reason for
the failure and how to resolve it. (Imre)
v14: Suggested by Jani.
- fix s/I915/CONFIG_DRM_I915/ typo
- add fw_path to the firmware object instead of using a static ptr (Jani)
v15:
1) Changed the firmware name as dmc_gen9.bin, everytime for a new firmware version a symbolic link
with same name will help not to build kernel again.
2) Changes done as per review comments from Imre.
- Error check removed for intel_csr_ucode_init.
- Moved csr-specific data structure to intel_csr.h and optimization done on structure definition.
- fw->data used directly for parsing the header info & memory allocation
only done separately for payload. (Animesh)
v16:
- No need for out_regs label in i915_driver_load(), so removed it.
- Changed the firmware name as skl_dmc_ver1.bin, followed naming convention <platform>_dmc_<api-version>.bin (Animesh)
Issue: VIZ-2569
Signed-off-by: A.Sunil Kamath <sunil.kamath@intel.com>
Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-05-04 19:58:44 +07:00
|
|
|
uint32_t mmiodata[8];
|
2016-02-18 22:21:11 +07:00
|
|
|
uint32_t dc_state;
|
2016-03-01 03:49:03 +07:00
|
|
|
uint32_t allowed_dc_mask;
|
drm/i915/skl: Add support to load SKL CSR firmware.
Display Context Save and Restore support is needed for
various SKL Display C states like DC5, DC6.
This implementation is added based on first version of DMC CSR program
that we received from h/w team.
Here we are using request_firmware based design.
Finally this firmware should end up in linux-firmware tree.
For SKL platform its mandatory to ensure that we load this
csr program before enabling DC states like DC5/DC6.
As CSR program gets reset on various conditions, we should ensure
to load it during boot and in future change to be added to load
this system resume sequence too.
v1: Initial relese as RFC patch
v2: Design change as per Daniel, Damien and Shobit's review comments
request firmware method followed.
v3: Some optimization and functional changes.
Pulled register defines into drivers/gpu/drm/i915/i915_reg.h
Used kmemdup to allocate and duplicate firmware content.
Ensured to free allocated buffer.
v4: Modified as per review comments from Satheesh and Daniel
Removed temporary buffer.
Optimized number of writes by replacing I915_WRITE with I915_WRITE64.
v5:
Modified as per review comemnts from Damien.
- Changed name for functions and firmware.
- Introduced HAS_CSR.
- Reverted back previous change and used csr_buf with u8 size.
- Using cpu_to_be64 for endianness change.
Modified as per review comments from Imre.
- Modified registers and macro names to be a bit closer to bspec terminology
and the existing register naming in the driver.
- Early return for non SKL platforms in intel_load_csr_program function.
- Added locking around CSR program load function as it may be called
concurrently during system/runtime resume.
- Releasing the fw before loading the program for consistency
- Handled error path during f/w load.
v6: Modified as per review comments from Imre.
- Corrected out_freecsr sequence.
v7: Modified as per review comments from Imre.
Fail loading fw if fw->size%8!=0.
v8: Rebase to latest.
v9: Rebase on top of -nightly (Damien)
v10: Enabled support for dmc firmware ver 1.0.
According to ver 1.0 in a single binary package all the firmware's that are
required for different stepping's of the product will be stored. The package
contains the css header, followed by the package header and the actual dmc
firmwares. Package header contains the firmware/stepping mapping table and
the corresponding firmware offsets to the individual binaries, within the
package. Each individual program binary contains the header and the payload
sections whose size is specified in the header section. This changes are done
to extract the specific firmaware from the package. (Animesh)
v11: Modified as per review comemnts from Imre.
- Added code comment from bpec for header structure elements.
- Added __packed to avoid structure padding.
- Added helper functions for stepping and substepping info.
- Added code comment for CSR_MAX_FW_SIZE.
- Disabled BXT firmware loading, will be enabled with dmc 1.0 support.
- Changed skl_stepping_info based on bspec, earlier used from config DB.
- Removed duplicate call of cpu_to_be* from intel_csr_load_program function.
- Used cpu_to_be32 instead of cpu_to_be64 as firmware binary in dword aligned.
- Added sanity check for header length.
- Added sanity check for mmio address got from firmware binary.
- kmalloc done separately for dmc header and dmc firmware. (Animesh)
v12: Modified as per review comemnts from Imre.
- Corrected the typo error in skl stepping info structure.
- Added out-of-bound access for skl_stepping_info.
- Sanity check for mmio address modified.
- Sanity check added for stepping and substeppig.
- Modified the intel_dmc_info structure, cache only the required header info. (Animesh)
v13: clarify firmware load error message.
The reason for a firmware loading failure can be obscure if the driver
is built-in. Provide an explanation to the user about the likely reason for
the failure and how to resolve it. (Imre)
v14: Suggested by Jani.
- fix s/I915/CONFIG_DRM_I915/ typo
- add fw_path to the firmware object instead of using a static ptr (Jani)
v15:
1) Changed the firmware name as dmc_gen9.bin, everytime for a new firmware version a symbolic link
with same name will help not to build kernel again.
2) Changes done as per review comments from Imre.
- Error check removed for intel_csr_ucode_init.
- Moved csr-specific data structure to intel_csr.h and optimization done on structure definition.
- fw->data used directly for parsing the header info & memory allocation
only done separately for payload. (Animesh)
v16:
- No need for out_regs label in i915_driver_load(), so removed it.
- Changed the firmware name as skl_dmc_ver1.bin, followed naming convention <platform>_dmc_<api-version>.bin (Animesh)
Issue: VIZ-2569
Signed-off-by: A.Sunil Kamath <sunil.kamath@intel.com>
Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-05-04 19:58:44 +07:00
|
|
|
};
|
|
|
|
|
2013-01-25 05:44:55 +07:00
|
|
|
enum i915_cache_level {
|
|
|
|
I915_CACHE_NONE = 0,
|
2013-08-06 19:17:02 +07:00
|
|
|
I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */
|
|
|
|
I915_CACHE_L3_LLC, /* gen7+, L3 sits between the domain specifc
|
|
|
|
caches, eg sampler/render caches, and the
|
|
|
|
large Last-Level-Cache. LLC is coherent with
|
|
|
|
the CPU, but L3 is only visible to the GPU. */
|
2013-08-08 20:41:10 +07:00
|
|
|
I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */
|
2013-01-25 05:44:55 +07:00
|
|
|
};
|
|
|
|
|
2016-12-05 21:29:36 +07:00
|
|
|
#define I915_COLOR_UNEVICTABLE (-1) /* a non-vma sharing the address space */
|
|
|
|
|
2015-02-14 02:23:44 +07:00
|
|
|
enum fb_op_origin {
|
|
|
|
ORIGIN_GTT,
|
|
|
|
ORIGIN_CPU,
|
|
|
|
ORIGIN_CS,
|
|
|
|
ORIGIN_FLIP,
|
2015-07-15 02:29:14 +07:00
|
|
|
ORIGIN_DIRTYFB,
|
2015-02-14 02:23:44 +07:00
|
|
|
};
|
|
|
|
|
2016-01-12 02:44:36 +07:00
|
|
|
struct intel_fbc {
|
2015-07-03 05:25:10 +07:00
|
|
|
/* This is always the inner lock when overlapping with struct_mutex and
|
|
|
|
* it's the outer lock when overlapping with stolen_lock. */
|
|
|
|
struct mutex lock;
|
2014-07-01 00:41:24 +07:00
|
|
|
unsigned threshold;
|
2015-02-14 02:23:46 +07:00
|
|
|
unsigned int possible_framebuffer_bits;
|
|
|
|
unsigned int busy_bits;
|
2016-01-19 20:35:48 +07:00
|
|
|
unsigned int visible_pipes_mask;
|
2015-02-09 23:46:29 +07:00
|
|
|
struct intel_crtc *crtc;
|
2013-06-28 06:30:21 +07:00
|
|
|
|
2014-06-20 02:06:10 +07:00
|
|
|
struct drm_mm_node compressed_fb;
|
2013-06-28 06:30:21 +07:00
|
|
|
struct drm_mm_node *compressed_llb;
|
|
|
|
|
2014-08-01 16:04:45 +07:00
|
|
|
bool false_color;
|
|
|
|
|
2015-10-15 20:44:46 +07:00
|
|
|
bool enabled;
|
2015-10-15 03:45:36 +07:00
|
|
|
bool active;
|
2018-06-25 23:37:57 +07:00
|
|
|
bool flip_pending;
|
2014-09-20 02:04:55 +07:00
|
|
|
|
2016-09-13 20:38:57 +07:00
|
|
|
bool underrun_detected;
|
|
|
|
struct work_struct underrun_work;
|
|
|
|
|
2017-07-15 02:38:22 +07:00
|
|
|
/*
|
|
|
|
* Due to the atomic rules we can't access some structures without the
|
|
|
|
* appropriate locking, so we cache information here in order to avoid
|
|
|
|
* these problems.
|
|
|
|
*/
|
2016-01-19 20:35:42 +07:00
|
|
|
struct intel_fbc_state_cache {
|
2017-01-16 22:21:27 +07:00
|
|
|
struct i915_vma *vma;
|
2018-02-20 20:42:08 +07:00
|
|
|
unsigned long flags;
|
2017-01-16 22:21:27 +07:00
|
|
|
|
2016-01-19 20:35:42 +07:00
|
|
|
struct {
|
|
|
|
unsigned int mode_flags;
|
|
|
|
uint32_t hsw_bdw_pixel_rate;
|
|
|
|
} crtc;
|
|
|
|
|
|
|
|
struct {
|
|
|
|
unsigned int rotation;
|
|
|
|
int src_w;
|
|
|
|
int src_h;
|
|
|
|
bool visible;
|
2017-10-18 03:08:07 +07:00
|
|
|
/*
|
|
|
|
* Display surface base address adjustement for
|
|
|
|
* pageflips. Note that on gen4+ this only adjusts up
|
|
|
|
* to a tile, offsets within a tile are handled in
|
|
|
|
* the hw itself (with the TILEOFF register).
|
|
|
|
*/
|
|
|
|
int adjusted_x;
|
|
|
|
int adjusted_y;
|
2017-10-18 03:08:11 +07:00
|
|
|
|
|
|
|
int y;
|
2016-01-19 20:35:42 +07:00
|
|
|
} plane;
|
|
|
|
|
|
|
|
struct {
|
2016-11-19 02:53:04 +07:00
|
|
|
const struct drm_format_info *format;
|
2016-01-19 20:35:42 +07:00
|
|
|
unsigned int stride;
|
|
|
|
} fb;
|
|
|
|
} state_cache;
|
|
|
|
|
2017-07-15 02:38:22 +07:00
|
|
|
/*
|
|
|
|
* This structure contains everything that's relevant to program the
|
|
|
|
* hardware registers. When we want to figure out if we need to disable
|
|
|
|
* and re-enable FBC for a new configuration we just check if there's
|
|
|
|
* something different in the struct. The genx_fbc_activate functions
|
|
|
|
* are supposed to read from it in order to program the registers.
|
|
|
|
*/
|
2015-12-24 03:28:11 +07:00
|
|
|
struct intel_fbc_reg_params {
|
2017-01-16 22:21:27 +07:00
|
|
|
struct i915_vma *vma;
|
2018-02-20 20:42:08 +07:00
|
|
|
unsigned long flags;
|
2017-01-16 22:21:27 +07:00
|
|
|
|
2015-12-24 03:28:11 +07:00
|
|
|
struct {
|
|
|
|
enum pipe pipe;
|
2017-11-18 02:19:10 +07:00
|
|
|
enum i9xx_plane_id i9xx_plane;
|
2015-12-24 03:28:11 +07:00
|
|
|
unsigned int fence_y_offset;
|
|
|
|
} crtc;
|
|
|
|
|
|
|
|
struct {
|
2016-11-19 02:53:04 +07:00
|
|
|
const struct drm_format_info *format;
|
2015-12-24 03:28:11 +07:00
|
|
|
unsigned int stride;
|
|
|
|
} fb;
|
|
|
|
|
|
|
|
int cfb_size;
|
2017-08-11 01:30:33 +07:00
|
|
|
unsigned int gen9_wa_cfb_stride;
|
2015-12-24 03:28:11 +07:00
|
|
|
} params;
|
|
|
|
|
2015-10-27 23:50:03 +07:00
|
|
|
const char *no_fbc_reason;
|
2010-02-06 03:42:41 +07:00
|
|
|
};
|
|
|
|
|
2016-12-31 18:20:12 +07:00
|
|
|
/*
|
2015-01-10 03:55:56 +07:00
|
|
|
* HIGH_RR is the highest eDP panel refresh rate read from EDID
|
|
|
|
* LOW_RR is the lowest eDP panel refresh rate found from EDID
|
|
|
|
* parsing for same resolution.
|
|
|
|
*/
|
|
|
|
enum drrs_refresh_rate_type {
|
|
|
|
DRRS_HIGH_RR,
|
|
|
|
DRRS_LOW_RR,
|
|
|
|
DRRS_MAX_RR, /* RR count */
|
|
|
|
};
|
|
|
|
|
|
|
|
enum drrs_support_type {
|
|
|
|
DRRS_NOT_SUPPORTED = 0,
|
|
|
|
STATIC_DRRS_SUPPORT = 1,
|
|
|
|
SEAMLESS_DRRS_SUPPORT = 2
|
2014-04-05 13:43:28 +07:00
|
|
|
};
|
|
|
|
|
2014-07-12 00:30:11 +07:00
|
|
|
struct intel_dp;
|
2015-01-10 03:55:56 +07:00
|
|
|
struct i915_drrs {
|
|
|
|
struct mutex mutex;
|
|
|
|
struct delayed_work work;
|
|
|
|
struct intel_dp *dp;
|
|
|
|
unsigned busy_frontbuffer_bits;
|
|
|
|
enum drrs_refresh_rate_type refresh_rate_type;
|
|
|
|
enum drrs_support_type type;
|
|
|
|
};
|
|
|
|
|
2013-10-04 02:15:06 +07:00
|
|
|
struct i915_psr {
|
2014-07-12 00:30:15 +07:00
|
|
|
struct mutex lock;
|
2018-08-09 21:21:01 +07:00
|
|
|
|
|
|
|
#define I915_PSR_DEBUG_MODE_MASK 0x0f
|
|
|
|
#define I915_PSR_DEBUG_DEFAULT 0x00
|
|
|
|
#define I915_PSR_DEBUG_DISABLE 0x01
|
|
|
|
#define I915_PSR_DEBUG_ENABLE 0x02
|
2018-08-08 21:19:11 +07:00
|
|
|
#define I915_PSR_DEBUG_FORCE_PSR1 0x03
|
2018-08-09 21:21:01 +07:00
|
|
|
#define I915_PSR_DEBUG_IRQ 0x10
|
|
|
|
|
|
|
|
u32 debug;
|
2013-10-04 02:15:06 +07:00
|
|
|
bool sink_support;
|
2018-08-09 21:21:01 +07:00
|
|
|
bool prepared, enabled;
|
|
|
|
struct intel_dp *dp;
|
2014-06-13 19:10:03 +07:00
|
|
|
bool active;
|
2018-06-14 02:26:00 +07:00
|
|
|
struct work_struct work;
|
2014-07-12 00:30:16 +07:00
|
|
|
unsigned busy_frontbuffer_bits;
|
2018-03-29 05:30:42 +07:00
|
|
|
bool sink_psr2_support;
|
2016-02-02 03:02:07 +07:00
|
|
|
bool link_standby;
|
2017-01-02 18:30:55 +07:00
|
|
|
bool colorimetry_support;
|
2017-01-02 18:30:58 +07:00
|
|
|
bool alpm;
|
2018-03-29 05:30:42 +07:00
|
|
|
bool psr2_enabled;
|
2018-03-29 05:30:44 +07:00
|
|
|
u8 sink_sync_latency;
|
2018-04-04 04:24:20 +07:00
|
|
|
ktime_t last_entry_attempt;
|
|
|
|
ktime_t last_exit;
|
2013-07-12 04:45:00 +07:00
|
|
|
};
|
2013-06-28 06:30:21 +07:00
|
|
|
|
2010-04-07 15:15:53 +07:00
|
|
|
enum intel_pch {
|
2012-07-04 04:48:16 +07:00
|
|
|
PCH_NONE = 0, /* No PCH present */
|
2010-04-07 15:15:53 +07:00
|
|
|
PCH_IBX, /* Ibexpeak PCH */
|
2017-06-20 20:03:08 +07:00
|
|
|
PCH_CPT, /* Cougarpoint/Pantherpoint PCH */
|
|
|
|
PCH_LPT, /* Lynxpoint/Wildcatpoint PCH */
|
2014-04-09 12:38:57 +07:00
|
|
|
PCH_SPT, /* Sunrisepoint PCH */
|
2017-08-01 01:52:20 +07:00
|
|
|
PCH_KBP, /* Kaby Lake PCH */
|
|
|
|
PCH_CNP, /* Cannon Lake PCH */
|
2018-01-12 01:00:05 +07:00
|
|
|
PCH_ICP, /* Ice Lake PCH */
|
2018-06-08 19:33:27 +07:00
|
|
|
PCH_NOP, /* PCH without south display */
|
2010-04-07 15:15:53 +07:00
|
|
|
};
|
|
|
|
|
2012-12-01 21:04:24 +07:00
|
|
|
enum intel_sbi_destination {
|
|
|
|
SBI_ICLK,
|
|
|
|
SBI_MPHY,
|
|
|
|
};
|
|
|
|
|
2011-07-13 04:56:22 +07:00
|
|
|
#define QUIRK_LVDS_SSC_DISABLE (1<<1)
|
2012-03-15 21:56:26 +07:00
|
|
|
#define QUIRK_INVERT_BRIGHTNESS (1<<2)
|
2014-07-04 06:27:50 +07:00
|
|
|
#define QUIRK_BACKLIGHT_PRESENT (1<<3)
|
2014-11-20 15:26:30 +07:00
|
|
|
#define QUIRK_PIN_SWIZZLED_PAGES (1<<5)
|
2017-06-30 23:33:48 +07:00
|
|
|
#define QUIRK_INCREASE_T12_DELAY (1<<6)
|
2018-07-11 03:02:05 +07:00
|
|
|
#define QUIRK_INCREASE_DDI_DISABLED_TIME (1<<7)
|
2010-07-20 03:53:12 +07:00
|
|
|
|
2010-03-30 12:34:14 +07:00
|
|
|
struct intel_fbdev;
|
2011-07-08 18:22:42 +07:00
|
|
|
struct intel_fbc_work;
|
2010-03-30 12:34:13 +07:00
|
|
|
|
2012-02-15 04:37:19 +07:00
|
|
|
struct intel_gmbus {
|
|
|
|
struct i2c_adapter adapter;
|
2016-03-07 22:56:59 +07:00
|
|
|
#define GMBUS_FORCE_BIT_RETRY (1U << 31)
|
2012-11-10 22:58:21 +07:00
|
|
|
u32 force_bit;
|
2012-02-15 04:37:19 +07:00
|
|
|
u32 reg0;
|
drm/i915: Type safe register read/write
Make I915_READ and I915_WRITE more type safe by wrapping the register
offset in a struct. This should eliminate most of the fumbles we've had
with misplaced parens.
This only takes care of normal mmio registers. We could extend the idea
to other register types and define each with its own struct. That way
you wouldn't be able to accidentally pass the wrong thing to a specific
register access function.
The gpio_reg setup is probably the ugliest thing left. But I figure I'd
just leave it for now, and wait for some divine inspiration to strike
before making it nice.
As for the generated code, it's actually a bit better sometimes. Eg.
looking at i915_irq_handler(), we can see the following change:
lea 0x70024(%rdx,%rax,1),%r9d
mov $0x1,%edx
- movslq %r9d,%r9
- mov %r9,%rsi
- mov %r9,-0x58(%rbp)
- callq *0xd8(%rbx)
+ mov %r9d,%esi
+ mov %r9d,-0x48(%rbp)
callq *0xd8(%rbx)
So previously gcc thought the register offset might be signed and
decided to sign extend it, just in case. The rest appears to be
mostly just minor shuffling of instructions.
v2: i915_mmio_reg_{offset,equal,valid}() helpers added
s/_REG/_MMIO/ in the register defines
mo more switch statements left to worry about
ring_emit stuff got sorted in a prep patch
cmd parser, lrc context and w/a batch buildup also in prep patch
vgpu stuff cleaned up and moved to a prep patch
all other unrelated changes split out
v3: Rebased due to BXT DSI/BLC, MOCS, etc.
v4: Rebased due to churn, s/i915_mmio_reg_t/i915_reg_t/
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/1447853606-2751-1-git-send-email-ville.syrjala@linux.intel.com
2015-11-18 20:33:26 +07:00
|
|
|
i915_reg_t gpio_reg;
|
2012-02-28 06:43:09 +07:00
|
|
|
struct i2c_algo_bit_data bit_algo;
|
2012-02-15 04:37:19 +07:00
|
|
|
struct drm_i915_private *dev_priv;
|
|
|
|
};
|
|
|
|
|
2012-11-03 01:55:02 +07:00
|
|
|
struct i915_suspend_saved_registers {
|
2008-05-07 09:27:53 +07:00
|
|
|
u32 saveDSPARB;
|
2007-11-22 11:14:14 +07:00
|
|
|
u32 saveFBC_CONTROL;
|
2008-02-17 10:19:29 +07:00
|
|
|
u32 saveCACHE_MODE_0;
|
|
|
|
u32 saveMI_ARB_STATE;
|
2007-11-22 11:14:14 +07:00
|
|
|
u32 saveSWF0[16];
|
|
|
|
u32 saveSWF1[16];
|
2015-09-19 00:03:43 +07:00
|
|
|
u32 saveSWF3[3];
|
2011-10-10 02:52:02 +07:00
|
|
|
uint64_t saveFENCE[I915_MAX_NUM_FENCES];
|
2011-07-27 03:53:06 +07:00
|
|
|
u32 savePCH_PORT_HOTPLUG;
|
2014-12-11 03:16:05 +07:00
|
|
|
u16 saveGCDGMBUS;
|
2012-11-03 01:55:02 +07:00
|
|
|
};
|
2012-11-03 01:55:03 +07:00
|
|
|
|
2014-05-05 19:19:56 +07:00
|
|
|
struct vlv_s0ix_state {
|
|
|
|
/* GAM */
|
|
|
|
u32 wr_watermark;
|
|
|
|
u32 gfx_prio_ctrl;
|
|
|
|
u32 arb_mode;
|
|
|
|
u32 gfx_pend_tlb0;
|
|
|
|
u32 gfx_pend_tlb1;
|
|
|
|
u32 lra_limits[GEN7_LRA_LIMITS_REG_NUM];
|
|
|
|
u32 media_max_req_count;
|
|
|
|
u32 gfx_max_req_count;
|
|
|
|
u32 render_hwsp;
|
|
|
|
u32 ecochk;
|
|
|
|
u32 bsd_hwsp;
|
|
|
|
u32 blt_hwsp;
|
|
|
|
u32 tlb_rd_addr;
|
|
|
|
|
|
|
|
/* MBC */
|
|
|
|
u32 g3dctl;
|
|
|
|
u32 gsckgctl;
|
|
|
|
u32 mbctl;
|
|
|
|
|
|
|
|
/* GCP */
|
|
|
|
u32 ucgctl1;
|
|
|
|
u32 ucgctl3;
|
|
|
|
u32 rcgctl1;
|
|
|
|
u32 rcgctl2;
|
|
|
|
u32 rstctl;
|
|
|
|
u32 misccpctl;
|
|
|
|
|
|
|
|
/* GPM */
|
|
|
|
u32 gfxpause;
|
|
|
|
u32 rpdeuhwtc;
|
|
|
|
u32 rpdeuc;
|
|
|
|
u32 ecobus;
|
|
|
|
u32 pwrdwnupctl;
|
|
|
|
u32 rp_down_timeout;
|
|
|
|
u32 rp_deucsw;
|
|
|
|
u32 rcubmabdtmr;
|
|
|
|
u32 rcedata;
|
|
|
|
u32 spare2gh;
|
|
|
|
|
|
|
|
/* Display 1 CZ domain */
|
|
|
|
u32 gt_imr;
|
|
|
|
u32 gt_ier;
|
|
|
|
u32 pm_imr;
|
|
|
|
u32 pm_ier;
|
|
|
|
u32 gt_scratch[GEN7_GT_SCRATCH_REG_NUM];
|
|
|
|
|
|
|
|
/* GT SA CZ domain */
|
|
|
|
u32 tilectl;
|
|
|
|
u32 gt_fifoctl;
|
|
|
|
u32 gtlc_wake_ctrl;
|
|
|
|
u32 gtlc_survive;
|
|
|
|
u32 pmwgicz;
|
|
|
|
|
|
|
|
/* Display 2 CZ domain */
|
|
|
|
u32 gu_ctl0;
|
|
|
|
u32 gu_ctl1;
|
2015-04-02 04:22:57 +07:00
|
|
|
u32 pcbr;
|
2014-05-05 19:19:56 +07:00
|
|
|
u32 clock_gate_dis2;
|
|
|
|
};
|
|
|
|
|
2014-07-11 02:31:18 +07:00
|
|
|
struct intel_rps_ei {
|
2017-03-15 22:43:03 +07:00
|
|
|
ktime_t ktime;
|
2014-07-11 02:31:18 +07:00
|
|
|
u32 render_c0;
|
|
|
|
u32 media_c0;
|
2014-07-04 04:33:01 +07:00
|
|
|
};
|
|
|
|
|
2017-10-11 04:30:06 +07:00
|
|
|
struct intel_rps {
|
drm/i915: sanitize rps irq disabling
When disabling the RPS interrupts there is a tricky dependency between
the thread disabling the interrupts, the RPS interrupt handler and the
corresponding RPS work. The RPS work can reenable the interrupts, so
there is no straightforward order in the disabling thread to (1) make
sure that any RPS work is flushed and to (2) disable all RPS
interrupts. Currently this is solved by masking the interrupts using two
separate mask registers (first level display IMR and PM IMR) and doing
the disabling when all first level interrupts are disabled.
This works, but the requirement to run with all first level interrupts
disabled is unnecessary making the suspend / unload time ordering of RPS
disabling wrt. other unitialization steps difficult and error prone.
Removing this restriction allows us to disable RPS early during suspend
/ unload and forget about it for the rest of the sequence. By adding a
more explicit method for avoiding the above race, it also becomes easier
to prove its correctness. Finally currently we can hit the WARN in
snb_update_pm_irq(), when a final RPS work runs with the first level
interrupts already disabled. This won't lead to any problem (due to the
separate interrupt masks), but with the change in this and the next
patch we can get rid of the WARN, while leaving it in place for other
scenarios.
To address the above points, add a new RPS interrupts_enabled flag and
use this during RPS disabling to avoid requeuing the RPS work and
reenabling of the RPS interrupts. Since the interrupt disabling happens
now in intel_suspend_gt_powersave(), we will disable RPS interrupts
explicitly during suspend (and not just through the first level mask),
but there is no problem doing so, it's also more consistent and allows
us to unify more of the RPS disabling during suspend and unload time in
the next patch.
v2/v3:
- rebase on patch "drm/i915: move rps irq disable one level up" in the
patchset
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-11-19 20:30:04 +07:00
|
|
|
/*
|
|
|
|
* work, interrupts_enabled and pm_iir are protected by
|
|
|
|
* dev_priv->irq_lock
|
|
|
|
*/
|
2012-11-03 01:55:03 +07:00
|
|
|
struct work_struct work;
|
drm/i915: sanitize rps irq disabling
When disabling the RPS interrupts there is a tricky dependency between
the thread disabling the interrupts, the RPS interrupt handler and the
corresponding RPS work. The RPS work can reenable the interrupts, so
there is no straightforward order in the disabling thread to (1) make
sure that any RPS work is flushed and to (2) disable all RPS
interrupts. Currently this is solved by masking the interrupts using two
separate mask registers (first level display IMR and PM IMR) and doing
the disabling when all first level interrupts are disabled.
This works, but the requirement to run with all first level interrupts
disabled is unnecessary making the suspend / unload time ordering of RPS
disabling wrt. other unitialization steps difficult and error prone.
Removing this restriction allows us to disable RPS early during suspend
/ unload and forget about it for the rest of the sequence. By adding a
more explicit method for avoiding the above race, it also becomes easier
to prove its correctness. Finally currently we can hit the WARN in
snb_update_pm_irq(), when a final RPS work runs with the first level
interrupts already disabled. This won't lead to any problem (due to the
separate interrupt masks), but with the change in this and the next
patch we can get rid of the WARN, while leaving it in place for other
scenarios.
To address the above points, add a new RPS interrupts_enabled flag and
use this during RPS disabling to avoid requeuing the RPS work and
reenabling of the RPS interrupts. Since the interrupt disabling happens
now in intel_suspend_gt_powersave(), we will disable RPS interrupts
explicitly during suspend (and not just through the first level mask),
but there is no problem doing so, it's also more consistent and allows
us to unify more of the RPS disabling during suspend and unload time in
the next patch.
v2/v3:
- rebase on patch "drm/i915: move rps irq disable one level up" in the
patchset
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-11-19 20:30:04 +07:00
|
|
|
bool interrupts_enabled;
|
2012-11-03 01:55:03 +07:00
|
|
|
u32 pm_iir;
|
2013-07-05 04:35:28 +07:00
|
|
|
|
2016-09-13 03:19:35 +07:00
|
|
|
/* PM interrupt bits that should never be masked */
|
2017-03-11 09:37:00 +07:00
|
|
|
u32 pm_intrmsk_mbz;
|
2016-05-31 15:28:27 +07:00
|
|
|
|
2014-03-20 08:31:11 +07:00
|
|
|
/* Frequencies are stored in potentially platform dependent multiples.
|
|
|
|
* In other words, *_freq needs to be multiplied by X to be interesting.
|
|
|
|
* Soft limits are those which are used for the dynamic reclocking done
|
|
|
|
* by the driver (raise frequencies under heavy loads, and lower for
|
|
|
|
* lighter loads). Hard limits are those imposed by the hardware.
|
|
|
|
*
|
|
|
|
* A distinction is made for overclocking, which is never enabled by
|
|
|
|
* default, and is considered to be above the hard limit if it's
|
|
|
|
* possible at all.
|
|
|
|
*/
|
|
|
|
u8 cur_freq; /* Current frequency (cached, may not == HW) */
|
|
|
|
u8 min_freq_softlimit; /* Minimum frequency permitted by the driver */
|
|
|
|
u8 max_freq_softlimit; /* Max frequency permitted by the driver */
|
|
|
|
u8 max_freq; /* Maximum frequency, RP0 if not overclocking */
|
|
|
|
u8 min_freq; /* AKA RPn. Minimum frequency */
|
2016-07-13 15:10:35 +07:00
|
|
|
u8 boost_freq; /* Frequency to request when wait boosting */
|
2015-03-18 16:48:21 +07:00
|
|
|
u8 idle_freq; /* Frequency to request when we are idle */
|
2014-03-20 08:31:11 +07:00
|
|
|
u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */
|
|
|
|
u8 rp1_freq; /* "less than" RP0 power/freqency */
|
|
|
|
u8 rp0_freq; /* Non-overclocked max frequency. */
|
2016-03-05 02:43:02 +07:00
|
|
|
u16 gpll_ref_freq; /* vlv/chv GPLL reference frequency */
|
2012-11-03 01:14:00 +07:00
|
|
|
|
drm/i915: Tweak RPS thresholds to more aggressively downclock
After applying wait-boost we often find ourselves stuck at higher clocks
than required. The current threshold value requires the GPU to be
continuously and completely idle for 313ms before it is dropped by one
bin. Conversely, we require the GPU to be busy for an average of 90% over
a 84ms period before we upclock. So the current thresholds almost never
downclock the GPU, and respond very slowly to sudden demands for more
power. It is easy to observe that we currently lock into the wrong bin
and both underperform in benchmarks and consume more power than optimal
(just by repeating the task and measuring the different results).
An alternative approach, as discussed in the bspec, is to use a
continuous threshold for upclocking, and an average value for downclocking.
This is good for quickly detecting and reacting to state changes within a
frame, however it fails with the common throttling method of waiting
upon the outstanding frame - at least it is difficult to choose a
threshold that works well at 15,000fps and at 60fps. So continue to use
average busy/idle loads to determine frequency change.
v2: Use 3 power zones to keep frequencies low in steady-state mostly
idle (e.g. scrolling, interactive 2D drawing), and frequencies high
for demanding games. In between those end-states, we use a
fast-reclocking algorithm to converge more quickly on the desired bin.
v3: Bug fixes - make sure we reset adj after switching power zones.
v4: Tune - drop the continuous busy thresholds as it prevents us from
choosing the right frequency for glxgears style swap benchmarks. Instead
the goal is to be able to find the right clocks irrespective of the
wait-boost.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Stéphane Marchesin <stephane.marchesin@gmail.com>
Cc: Owen Taylor <otaylor@redhat.com>
Cc: "Meng, Mengmeng" <mengmeng.meng@intel.com>
Cc: "Zhuang, Lena" <lena.zhuang@intel.com>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-09-25 23:34:57 +07:00
|
|
|
int last_adj;
|
drm/i915: Interactive RPS mode
RPS provides a feedback loop where we use the load during the previous
evaluation interval to decide whether to up or down clock the GPU
frequency. Our responsiveness is split into 3 regimes, a high and low
plateau with the intent to keep the gpu clocked high to cover occasional
stalls under high load, and low despite occasional glitches under steady
low load, and inbetween. However, we run into situations like kodi where
we want to stay at low power (video decoding is done efficiently
inside the fixed function HW and doesn't need high clocks even for high
bitrate streams), but just occasionally the pipeline is more complex
than a video decode and we need a smidgen of extra GPU power to present
on time. In the high power regime, we sample at sub frame intervals with
a bias to upclocking, and conversely at low power we sample over a few
frames worth to provide what we consider to be the right levels of
responsiveness respectively. At low power, we more or less expect to be
kicked out to high power at the start of a busy sequence by waitboosting.
Prior to commit e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active
request") whenever we missed the frame or stalled, we would immediate go
full throttle and upclock the GPU to max. But in commit e9af4ea2b9e7, we
relaxed the waitboosting to only apply if the pipeline was deep to avoid
over-committing resources for a near miss. Sadly though, a near miss is
still a miss, and perceptible as jitter in the frame delivery.
To try and prevent the near miss before having to resort to boosting
after the fact, we use the pageflip queue as an indication that we are
in an "interactive" regime and so should sample the load more frequently
to provide power before the frame misses it vblank. This will make us
more favorable to providing a small power increase (one or two bins) as
required rather than going all the way to maximum and then having to
work back down again. (We still keep the waitboosting mechanism around
just in case a dramatic change in system load requires urgent uplocking,
faster than we can provide in a few evaluation intervals.)
v2: Reduce rps_set_interactive to a boolean parameter to avoid the
confusion of what if they wanted a new power mode after pinning to a
different mode (which to choose?)
v3: Only reprogram RPS while the GT is awake, it will be set when we
wake the GT, and while off warns about being used outside of rpm.
v4: Fix deferred application of interactive mode
v5: s/state/interactive/
v6: Group the mutex with its principle in a substruct
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107111
Fixes: e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180731132629.3381-1-chris@chris-wilson.co.uk
2018-07-31 20:26:29 +07:00
|
|
|
|
|
|
|
struct {
|
|
|
|
struct mutex mutex;
|
|
|
|
|
|
|
|
enum { LOW_POWER, BETWEEN, HIGH_POWER } mode;
|
|
|
|
unsigned int interactive;
|
|
|
|
|
|
|
|
u8 up_threshold; /* Current %busy required to uplock */
|
|
|
|
u8 down_threshold; /* Current %busy required to downclock */
|
|
|
|
} power;
|
drm/i915: Tweak RPS thresholds to more aggressively downclock
After applying wait-boost we often find ourselves stuck at higher clocks
than required. The current threshold value requires the GPU to be
continuously and completely idle for 313ms before it is dropped by one
bin. Conversely, we require the GPU to be busy for an average of 90% over
a 84ms period before we upclock. So the current thresholds almost never
downclock the GPU, and respond very slowly to sudden demands for more
power. It is easy to observe that we currently lock into the wrong bin
and both underperform in benchmarks and consume more power than optimal
(just by repeating the task and measuring the different results).
An alternative approach, as discussed in the bspec, is to use a
continuous threshold for upclocking, and an average value for downclocking.
This is good for quickly detecting and reacting to state changes within a
frame, however it fails with the common throttling method of waiting
upon the outstanding frame - at least it is difficult to choose a
threshold that works well at 15,000fps and at 60fps. So continue to use
average busy/idle loads to determine frequency change.
v2: Use 3 power zones to keep frequencies low in steady-state mostly
idle (e.g. scrolling, interactive 2D drawing), and frequencies high
for demanding games. In between those end-states, we use a
fast-reclocking algorithm to converge more quickly on the desired bin.
v3: Bug fixes - make sure we reset adj after switching power zones.
v4: Tune - drop the continuous busy thresholds as it prevents us from
choosing the right frequency for glxgears style swap benchmarks. Instead
the goal is to be able to find the right clocks irrespective of the
wait-boost.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Stéphane Marchesin <stephane.marchesin@gmail.com>
Cc: Owen Taylor <otaylor@redhat.com>
Cc: "Meng, Mengmeng" <mengmeng.meng@intel.com>
Cc: "Zhuang, Lena" <lena.zhuang@intel.com>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-09-25 23:34:57 +07:00
|
|
|
|
2013-10-11 03:58:50 +07:00
|
|
|
bool enabled;
|
2017-06-28 19:35:48 +07:00
|
|
|
atomic_t num_waiters;
|
|
|
|
atomic_t boosts;
|
2012-11-03 01:14:01 +07:00
|
|
|
|
2014-07-11 02:31:18 +07:00
|
|
|
/* manual wa residency calculations */
|
2017-03-10 04:12:30 +07:00
|
|
|
struct intel_rps_ei ei;
|
2012-11-03 01:55:03 +07:00
|
|
|
};
|
|
|
|
|
2017-10-11 04:30:10 +07:00
|
|
|
struct intel_rc6 {
|
|
|
|
bool enabled;
|
2018-02-08 23:00:36 +07:00
|
|
|
u64 prev_hw_residency[4];
|
|
|
|
u64 cur_residency[4];
|
2017-10-11 04:30:10 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
struct intel_llc_pstate {
|
|
|
|
bool enabled;
|
|
|
|
};
|
|
|
|
|
2017-10-11 04:30:06 +07:00
|
|
|
struct intel_gen6_power_mgmt {
|
|
|
|
struct intel_rps rps;
|
2017-10-11 04:30:10 +07:00
|
|
|
struct intel_rc6 rc6;
|
|
|
|
struct intel_llc_pstate llc_pstate;
|
2017-10-11 04:30:06 +07:00
|
|
|
};
|
|
|
|
|
2012-11-30 04:18:51 +07:00
|
|
|
/* defined intel_pm.c */
|
|
|
|
extern spinlock_t mchdev_lock;
|
|
|
|
|
2012-11-03 01:55:03 +07:00
|
|
|
struct intel_ilk_power_mgmt {
|
|
|
|
u8 cur_delay;
|
|
|
|
u8 min_delay;
|
|
|
|
u8 max_delay;
|
|
|
|
u8 fmax;
|
|
|
|
u8 fstart;
|
|
|
|
|
|
|
|
u64 last_count1;
|
|
|
|
unsigned long last_time1;
|
|
|
|
unsigned long chipset_power;
|
|
|
|
u64 last_count2;
|
2014-07-17 04:05:06 +07:00
|
|
|
u64 last_time2;
|
2012-11-03 01:55:03 +07:00
|
|
|
unsigned long gfx_power;
|
|
|
|
u8 corr;
|
|
|
|
|
|
|
|
int c_m;
|
|
|
|
int r_t;
|
|
|
|
};
|
|
|
|
|
2014-03-05 00:22:55 +07:00
|
|
|
struct drm_i915_private;
|
|
|
|
struct i915_power_well;
|
|
|
|
|
|
|
|
struct i915_power_well_ops {
|
|
|
|
/*
|
|
|
|
* Synchronize the well's hw state to match the current sw state, for
|
|
|
|
* example enable/disable it based on the current refcount. Called
|
|
|
|
* during driver init and resume time, possibly after first calling
|
|
|
|
* the enable/disable handlers.
|
|
|
|
*/
|
|
|
|
void (*sync_hw)(struct drm_i915_private *dev_priv,
|
|
|
|
struct i915_power_well *power_well);
|
|
|
|
/*
|
|
|
|
* Enable the well and resources that depend on it (for example
|
|
|
|
* interrupts located on the well). Called after the 0->1 refcount
|
|
|
|
* transition.
|
|
|
|
*/
|
|
|
|
void (*enable)(struct drm_i915_private *dev_priv,
|
|
|
|
struct i915_power_well *power_well);
|
|
|
|
/*
|
|
|
|
* Disable the well and resources that depend on it. Called after
|
|
|
|
* the 1->0 refcount transition.
|
|
|
|
*/
|
|
|
|
void (*disable)(struct drm_i915_private *dev_priv,
|
|
|
|
struct i915_power_well *power_well);
|
|
|
|
/* Returns the hw enabled state. */
|
|
|
|
bool (*is_enabled)(struct drm_i915_private *dev_priv,
|
|
|
|
struct i915_power_well *power_well);
|
|
|
|
};
|
|
|
|
|
2018-08-06 16:58:39 +07:00
|
|
|
struct i915_power_well_regs {
|
|
|
|
i915_reg_t bios;
|
|
|
|
i915_reg_t driver;
|
|
|
|
i915_reg_t kvmr;
|
|
|
|
i915_reg_t debug;
|
|
|
|
};
|
|
|
|
|
2013-05-30 21:07:11 +07:00
|
|
|
/* Power well structure for haswell */
|
2018-08-06 16:58:37 +07:00
|
|
|
struct i915_power_well_desc {
|
2013-11-25 22:15:29 +07:00
|
|
|
const char *name;
|
2013-11-25 22:15:30 +07:00
|
|
|
bool always_on;
|
2017-02-09 16:31:21 +07:00
|
|
|
u64 domains;
|
2016-10-06 23:22:14 +07:00
|
|
|
/* unique identifier for this power well */
|
2017-07-12 03:42:30 +07:00
|
|
|
enum i915_power_well_id id;
|
2016-10-06 23:22:15 +07:00
|
|
|
/*
|
|
|
|
* Arbitraty data associated with this power well. Platform and power
|
|
|
|
* well specific.
|
|
|
|
*/
|
2017-07-06 21:40:29 +07:00
|
|
|
union {
|
2018-08-06 16:58:38 +07:00
|
|
|
struct {
|
|
|
|
/*
|
|
|
|
* request/status flag index in the PUNIT power well
|
|
|
|
* control/status registers.
|
|
|
|
*/
|
|
|
|
u8 idx;
|
|
|
|
} vlv;
|
2017-07-06 21:40:29 +07:00
|
|
|
struct {
|
|
|
|
enum dpio_phy phy;
|
|
|
|
} bxt;
|
2017-07-12 22:54:13 +07:00
|
|
|
struct {
|
2018-08-06 16:58:39 +07:00
|
|
|
const struct i915_power_well_regs *regs;
|
|
|
|
/*
|
|
|
|
* request/status flag index in the power well
|
|
|
|
* constrol/status registers.
|
|
|
|
*/
|
|
|
|
u8 idx;
|
2017-07-12 22:54:13 +07:00
|
|
|
/* Mask of pipes whose IRQ logic is backed by the pw */
|
|
|
|
u8 irq_pipe_mask;
|
|
|
|
/* The pw is backing the VGA functionality */
|
|
|
|
bool has_vga:1;
|
2017-07-12 03:42:35 +07:00
|
|
|
bool has_fuses:1;
|
2017-07-12 22:54:13 +07:00
|
|
|
} hsw;
|
2017-07-06 21:40:29 +07:00
|
|
|
};
|
2014-03-05 00:22:55 +07:00
|
|
|
const struct i915_power_well_ops *ops;
|
2013-05-30 21:07:11 +07:00
|
|
|
};
|
|
|
|
|
2018-08-06 16:58:37 +07:00
|
|
|
struct i915_power_well {
|
|
|
|
const struct i915_power_well_desc *desc;
|
|
|
|
/* power well enable/disable usage count */
|
|
|
|
int count;
|
|
|
|
/* cached hw enabled state */
|
|
|
|
bool hw_enabled;
|
|
|
|
};
|
|
|
|
|
2013-10-25 21:36:47 +07:00
|
|
|
struct i915_power_domains {
|
2013-10-25 21:36:48 +07:00
|
|
|
/*
|
|
|
|
* Power wells needed for initialization at driver init and suspend
|
|
|
|
* time are on. They are kept on until after the first modeset.
|
|
|
|
*/
|
2014-04-25 17:19:05 +07:00
|
|
|
bool initializing;
|
2018-08-16 19:37:57 +07:00
|
|
|
bool display_core_suspended;
|
2013-11-25 22:15:29 +07:00
|
|
|
int power_well_count;
|
2013-10-25 21:36:48 +07:00
|
|
|
|
2013-10-25 21:36:47 +07:00
|
|
|
struct mutex lock;
|
2013-11-25 22:15:35 +07:00
|
|
|
int domain_use_count[POWER_DOMAIN_NUM];
|
2013-11-25 22:15:29 +07:00
|
|
|
struct i915_power_well *power_wells;
|
2013-10-25 21:36:47 +07:00
|
|
|
};
|
|
|
|
|
2013-09-20 01:13:41 +07:00
|
|
|
#define MAX_L3_SLICES 2
|
2012-11-03 01:55:07 +07:00
|
|
|
struct intel_l3_parity {
|
2013-09-20 01:13:41 +07:00
|
|
|
u32 *remap_info[MAX_L3_SLICES];
|
2012-11-03 01:55:07 +07:00
|
|
|
struct work_struct error_work;
|
2013-09-20 01:13:41 +07:00
|
|
|
int which_slice;
|
2012-11-03 01:55:07 +07:00
|
|
|
};
|
|
|
|
|
2012-11-14 23:14:03 +07:00
|
|
|
struct i915_gem_mm {
|
|
|
|
/** Memory allocator for GTT stolen memory */
|
|
|
|
struct drm_mm stolen;
|
2015-07-03 05:25:09 +07:00
|
|
|
/** Protects the usage of the GTT stolen memory allocator. This is
|
|
|
|
* always the inner lock when overlapping with struct_mutex. */
|
|
|
|
struct mutex stolen_lock;
|
|
|
|
|
2017-10-16 18:40:37 +07:00
|
|
|
/* Protects bound_list/unbound_list and #drm_i915_gem_object.mm.link */
|
|
|
|
spinlock_t obj_lock;
|
|
|
|
|
2012-11-14 23:14:03 +07:00
|
|
|
/** List of all objects in gtt_space. Used to restore gtt
|
|
|
|
* mappings on resume */
|
|
|
|
struct list_head bound_list;
|
|
|
|
/**
|
|
|
|
* List of objects which are not bound to the GTT (thus
|
2016-10-28 19:58:42 +07:00
|
|
|
* are idle and not used by the GPU). These objects may or may
|
|
|
|
* not actually have any pages attached.
|
2012-11-14 23:14:03 +07:00
|
|
|
*/
|
|
|
|
struct list_head unbound_list;
|
|
|
|
|
2016-10-24 19:42:14 +07:00
|
|
|
/** List of all objects in gtt_space, currently mmaped by userspace.
|
|
|
|
* All objects within this list must also be on bound_list.
|
|
|
|
*/
|
|
|
|
struct list_head userfault_list;
|
|
|
|
|
2016-10-28 19:58:42 +07:00
|
|
|
/**
|
|
|
|
* List of objects which are pending destruction.
|
|
|
|
*/
|
|
|
|
struct llist_head free_list;
|
|
|
|
struct work_struct free_work;
|
2017-10-14 03:26:20 +07:00
|
|
|
spinlock_t free_lock;
|
2018-02-20 05:06:31 +07:00
|
|
|
/**
|
|
|
|
* Count of objects pending destructions. Used to skip needlessly
|
|
|
|
* waiting on an RCU barrier if no objects are waiting to be freed.
|
|
|
|
*/
|
|
|
|
atomic_t free_count;
|
2016-10-28 19:58:42 +07:00
|
|
|
|
2017-08-23 00:38:28 +07:00
|
|
|
/**
|
|
|
|
* Small stash of WC pages
|
|
|
|
*/
|
2018-07-05 01:55:18 +07:00
|
|
|
struct pagestash wc_stash;
|
2017-08-23 00:38:28 +07:00
|
|
|
|
2017-10-07 05:18:14 +07:00
|
|
|
/**
|
|
|
|
* tmpfs instance used for shmem backed objects
|
|
|
|
*/
|
|
|
|
struct vfsmount *gemfs;
|
|
|
|
|
2012-11-14 23:14:03 +07:00
|
|
|
/** PPGTT used for aliasing the PPGTT with the GTT */
|
|
|
|
struct i915_hw_ppgtt *aliasing_ppgtt;
|
|
|
|
|
2014-05-20 14:28:43 +07:00
|
|
|
struct notifier_block oom_notifier;
|
2016-04-04 20:46:43 +07:00
|
|
|
struct notifier_block vmap_notifier;
|
2014-03-25 20:23:04 +07:00
|
|
|
struct shrinker shrinker;
|
2012-11-14 23:14:03 +07:00
|
|
|
|
|
|
|
/** LRU list of objects with fence regs on them. */
|
|
|
|
struct list_head fence_list;
|
|
|
|
|
2017-06-16 21:05:22 +07:00
|
|
|
/**
|
|
|
|
* Workqueue to fault in userptr pages, flushed by the execbuf
|
|
|
|
* when required but otherwise left to userspace to try again
|
|
|
|
* on EAGAIN.
|
|
|
|
*/
|
|
|
|
struct workqueue_struct *userptr_wq;
|
|
|
|
|
2017-05-03 16:39:18 +07:00
|
|
|
u64 unordered_timeline;
|
|
|
|
|
2014-05-21 22:37:52 +07:00
|
|
|
/* the indicator for dispatch video commands on two BSD rings */
|
2016-09-01 18:58:21 +07:00
|
|
|
atomic_t bsd_engine_dispatch_index;
|
2014-05-21 22:37:52 +07:00
|
|
|
|
2012-11-14 23:14:03 +07:00
|
|
|
/** Bit 6 swizzling required for X tiling */
|
|
|
|
uint32_t bit_6_swizzle_x;
|
|
|
|
/** Bit 6 swizzling required for Y tiling */
|
|
|
|
uint32_t bit_6_swizzle_y;
|
|
|
|
|
|
|
|
/* accounting, useful for userland debugging */
|
2013-07-25 03:40:23 +07:00
|
|
|
spinlock_t object_stat_lock;
|
2016-10-18 19:02:48 +07:00
|
|
|
u64 object_memory;
|
2012-11-14 23:14:03 +07:00
|
|
|
u32 object_count;
|
|
|
|
};
|
|
|
|
|
2017-12-12 02:41:34 +07:00
|
|
|
#define I915_IDLE_ENGINES_TIMEOUT (200) /* in ms */
|
|
|
|
|
2016-10-28 19:58:24 +07:00
|
|
|
#define I915_RESET_TIMEOUT (10 * HZ) /* 10s */
|
|
|
|
#define I915_FENCE_TIMEOUT (10 * HZ) /* 10s */
|
|
|
|
|
2016-11-18 20:09:04 +07:00
|
|
|
#define I915_ENGINE_DEAD_TIMEOUT (4 * HZ) /* Seqno, head and subunits dead */
|
|
|
|
#define I915_SEQNO_DEAD_TIMEOUT (12 * HZ) /* Seqno dead with active head */
|
|
|
|
|
2018-06-02 17:48:53 +07:00
|
|
|
#define I915_ENGINE_WEDGED_TIMEOUT (60 * HZ) /* Reset but no recovery? */
|
|
|
|
|
2015-08-08 07:01:16 +07:00
|
|
|
#define DP_AUX_A 0x40
|
|
|
|
#define DP_AUX_B 0x10
|
|
|
|
#define DP_AUX_C 0x20
|
|
|
|
#define DP_AUX_D 0x30
|
2018-06-12 07:25:12 +07:00
|
|
|
#define DP_AUX_E 0x50
|
2018-01-30 06:22:15 +07:00
|
|
|
#define DP_AUX_F 0x60
|
2015-08-08 07:01:16 +07:00
|
|
|
|
2015-08-17 15:04:04 +07:00
|
|
|
#define DDC_PIN_B 0x05
|
|
|
|
#define DDC_PIN_C 0x04
|
|
|
|
#define DDC_PIN_D 0x06
|
|
|
|
|
2013-09-13 03:06:24 +07:00
|
|
|
struct ddi_vbt_port_info {
|
2017-10-30 21:57:02 +07:00
|
|
|
int max_tmds_clock;
|
|
|
|
|
2014-08-01 17:07:54 +07:00
|
|
|
/*
|
|
|
|
* This is an index in the HDMI/DVI DDI buffer translation table.
|
|
|
|
* The special value HDMI_LEVEL_SHIFT_UNKNOWN means the VBT didn't
|
|
|
|
* populate this field.
|
|
|
|
*/
|
|
|
|
#define HDMI_LEVEL_SHIFT_UNKNOWN 0xff
|
2013-09-13 03:06:24 +07:00
|
|
|
uint8_t hdmi_level_shift;
|
2013-09-13 03:12:18 +07:00
|
|
|
|
|
|
|
uint8_t supports_dvi:1;
|
|
|
|
uint8_t supports_hdmi:1;
|
|
|
|
uint8_t supports_dp:1;
|
2016-12-21 17:17:24 +07:00
|
|
|
uint8_t supports_edp:1;
|
2015-08-08 07:01:16 +07:00
|
|
|
|
|
|
|
uint8_t alternate_aux_channel;
|
2015-08-17 15:04:04 +07:00
|
|
|
uint8_t alternate_ddc_pin;
|
2015-07-10 18:10:55 +07:00
|
|
|
|
|
|
|
uint8_t dp_boost_level;
|
|
|
|
uint8_t hdmi_boost_level;
|
2018-02-01 18:03:43 +07:00
|
|
|
int dp_max_link_rate; /* 0 for not limited by VBT */
|
2013-09-13 03:06:24 +07:00
|
|
|
};
|
|
|
|
|
2014-11-14 23:52:30 +07:00
|
|
|
enum psr_lines_to_wait {
|
|
|
|
PSR_0_LINES_TO_WAIT = 0,
|
|
|
|
PSR_1_LINE_TO_WAIT,
|
|
|
|
PSR_4_LINES_TO_WAIT,
|
|
|
|
PSR_8_LINES_TO_WAIT
|
2014-03-28 11:44:57 +07:00
|
|
|
};
|
|
|
|
|
2013-05-10 06:03:18 +07:00
|
|
|
struct intel_vbt_data {
|
|
|
|
struct drm_display_mode *lfp_lvds_vbt_mode; /* if any */
|
|
|
|
struct drm_display_mode *sdvo_lvds_vbt_mode; /* if any */
|
|
|
|
|
|
|
|
/* Feature bits */
|
|
|
|
unsigned int int_tv_support:1;
|
|
|
|
unsigned int lvds_dither:1;
|
|
|
|
unsigned int int_crt_support:1;
|
|
|
|
unsigned int lvds_use_ssc:1;
|
2018-05-08 21:08:14 +07:00
|
|
|
unsigned int int_lvds_support:1;
|
2013-05-10 06:03:18 +07:00
|
|
|
unsigned int display_clock_mode:1;
|
|
|
|
unsigned int fdi_rx_polarity_inverted:1;
|
2016-04-08 20:28:12 +07:00
|
|
|
unsigned int panel_type:4;
|
2013-05-10 06:03:18 +07:00
|
|
|
int lvds_ssc_freq;
|
|
|
|
unsigned int bios_lvds_val; /* initial [PCH_]LVDS reg val in VBIOS */
|
|
|
|
|
2014-03-28 11:44:57 +07:00
|
|
|
enum drrs_support_type drrs_type;
|
|
|
|
|
2016-03-24 22:50:20 +07:00
|
|
|
struct {
|
|
|
|
int rate;
|
|
|
|
int lanes;
|
|
|
|
int preemphasis;
|
|
|
|
int vswing;
|
2016-03-24 22:50:21 +07:00
|
|
|
bool low_vswing;
|
2016-03-24 22:50:20 +07:00
|
|
|
bool initialized;
|
|
|
|
int bpp;
|
|
|
|
struct edp_power_seq pps;
|
|
|
|
} edp;
|
2013-05-10 06:03:18 +07:00
|
|
|
|
2014-11-14 23:52:30 +07:00
|
|
|
struct {
|
2018-05-09 07:35:24 +07:00
|
|
|
bool enable;
|
2014-11-14 23:52:30 +07:00
|
|
|
bool full_link;
|
|
|
|
bool require_aux_wakeup;
|
|
|
|
int idle_frames;
|
|
|
|
enum psr_lines_to_wait lines_to_wait;
|
2018-05-22 16:27:23 +07:00
|
|
|
int tp1_wakeup_time_us;
|
|
|
|
int tp2_tp3_wakeup_time_us;
|
2014-11-14 23:52:30 +07:00
|
|
|
} psr;
|
|
|
|
|
2013-12-15 05:38:29 +07:00
|
|
|
struct {
|
|
|
|
u16 pwm_freq_hz;
|
2014-04-09 15:22:06 +07:00
|
|
|
bool present;
|
2013-12-15 05:38:29 +07:00
|
|
|
bool active_low_pwm;
|
2014-06-24 22:27:39 +07:00
|
|
|
u8 min_brightness; /* min_brightness/255 of max */
|
2016-12-08 16:26:18 +07:00
|
|
|
u8 controller; /* brightness controller number */
|
2016-04-26 20:14:24 +07:00
|
|
|
enum intel_backlight_type type;
|
2013-12-15 05:38:29 +07:00
|
|
|
} backlight;
|
|
|
|
|
2013-08-27 19:12:25 +07:00
|
|
|
/* MIPI DSI */
|
|
|
|
struct {
|
|
|
|
u16 panel_id;
|
2014-04-14 12:30:34 +07:00
|
|
|
struct mipi_config *config;
|
|
|
|
struct mipi_pps_data *pps;
|
2017-10-13 19:44:59 +07:00
|
|
|
u16 bl_ports;
|
|
|
|
u16 cabc_ports;
|
2014-04-14 12:30:34 +07:00
|
|
|
u8 seq_version;
|
|
|
|
u32 size;
|
|
|
|
u8 *data;
|
2015-12-21 20:10:57 +07:00
|
|
|
const u8 *sequence[MIPI_SEQ_MAX];
|
drm/i915: Fix DSI panels with v1 MIPI sequences without a DEASSERT sequence v3
So far models of the Dell Venue 8 Pro, with a panel with MIPI panel
index = 3, one of which has been kindly provided to me by Jan Brummer,
where not working with the i915 driver, giving a black screen on the
first modeset.
The problem with at least these Dells is that their VBT defines a MIPI
ASSERT sequence, but not a DEASSERT sequence. Instead they DEASSERT the
reset in their INIT_OTP sequence, but the deassert must be done before
calling intel_dsi_device_ready(), so that is too late.
Simply doing the INIT_OTP sequence earlier is not enough to fix this,
because the INIT_OTP sequence also sends various MIPI packets to the
panel, which can only happen after calling intel_dsi_device_ready().
This commit fixes this by splitting the INIT_OTP sequence into everything
before the first DSI packet and everything else, including the first DSI
packet. The first part (everything before the first DSI packet) is then
used as deassert sequence.
Changed in v2:
-Split the init OTP sequence into a deassert reset and the actual init
OTP sequence, instead of calling it earlier and then having the first
mipi_exec_send_packet() call call intel_dsi_device_ready().
Changes in v3:
-Move the whole shebang to intel_bios.c
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82880
References: https://bugs.freedesktop.org/show_bug.cgi?id=101205
Cc: Jan-Michael Brummer <jan.brummer@tabos.org>
Reported-by: Jan-Michael Brummer <jan.brummer@tabos.org>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180214082151.25015-3-hdegoede@redhat.com
2018-02-14 15:21:51 +07:00
|
|
|
u8 *deassert_seq; /* Used by fixup_mipi_sequences() */
|
2013-08-27 19:12:25 +07:00
|
|
|
} dsi;
|
|
|
|
|
2013-05-10 06:03:18 +07:00
|
|
|
int crt_ddc_pin;
|
|
|
|
|
|
|
|
int child_dev_num;
|
2017-08-25 01:54:03 +07:00
|
|
|
struct child_device_config *child_dev;
|
2013-09-13 03:06:24 +07:00
|
|
|
|
|
|
|
struct ddi_vbt_port_info ddi_port_info[I915_MAX_PORTS];
|
2016-03-24 22:50:22 +07:00
|
|
|
struct sdvo_device_mapping sdvo_mappings[2];
|
2013-05-10 06:03:18 +07:00
|
|
|
};
|
|
|
|
|
2013-08-07 02:24:04 +07:00
|
|
|
enum intel_ddb_partitioning {
|
|
|
|
INTEL_DDB_PART_1_2,
|
|
|
|
INTEL_DDB_PART_5_6, /* IVB+ */
|
|
|
|
};
|
|
|
|
|
2013-08-07 02:24:05 +07:00
|
|
|
struct intel_wm_level {
|
|
|
|
bool enable;
|
|
|
|
uint32_t pri_val;
|
|
|
|
uint32_t spr_val;
|
|
|
|
uint32_t cur_val;
|
|
|
|
uint32_t fbc_val;
|
|
|
|
};
|
|
|
|
|
2013-12-17 19:46:36 +07:00
|
|
|
struct ilk_wm_values {
|
2013-10-09 23:18:03 +07:00
|
|
|
uint32_t wm_pipe[3];
|
|
|
|
uint32_t wm_lp[3];
|
|
|
|
uint32_t wm_lp_spr[3];
|
|
|
|
uint32_t wm_linetime[3];
|
|
|
|
bool enable_fbc_wm;
|
|
|
|
enum intel_ddb_partitioning partitioning;
|
|
|
|
};
|
|
|
|
|
2017-04-22 01:14:21 +07:00
|
|
|
struct g4x_pipe_wm {
|
2016-11-29 00:37:08 +07:00
|
|
|
uint16_t plane[I915_MAX_PLANES];
|
2017-04-22 01:14:29 +07:00
|
|
|
uint16_t fbc;
|
2015-06-25 02:00:04 +07:00
|
|
|
};
|
2015-03-06 02:19:49 +07:00
|
|
|
|
2017-04-22 01:14:21 +07:00
|
|
|
struct g4x_sr_wm {
|
2015-06-25 02:00:04 +07:00
|
|
|
uint16_t plane;
|
2016-11-29 00:37:08 +07:00
|
|
|
uint16_t cursor;
|
2017-04-22 01:14:29 +07:00
|
|
|
uint16_t fbc;
|
2016-11-29 00:37:08 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
struct vlv_wm_ddl_values {
|
|
|
|
uint8_t plane[I915_MAX_PLANES];
|
2015-06-25 02:00:04 +07:00
|
|
|
};
|
2015-03-06 02:19:49 +07:00
|
|
|
|
2015-06-25 02:00:04 +07:00
|
|
|
struct vlv_wm_values {
|
2017-04-22 01:14:21 +07:00
|
|
|
struct g4x_pipe_wm pipe[3];
|
|
|
|
struct g4x_sr_wm sr;
|
2016-11-29 00:37:08 +07:00
|
|
|
struct vlv_wm_ddl_values ddl[3];
|
2015-06-25 02:00:03 +07:00
|
|
|
uint8_t level;
|
|
|
|
bool cxsr;
|
2015-03-06 02:19:45 +07:00
|
|
|
};
|
|
|
|
|
2017-04-22 01:14:29 +07:00
|
|
|
struct g4x_wm_values {
|
|
|
|
struct g4x_pipe_wm pipe[2];
|
|
|
|
struct g4x_sr_wm sr;
|
|
|
|
struct g4x_sr_wm hpll;
|
|
|
|
bool cxsr;
|
|
|
|
bool hpll_en;
|
|
|
|
bool fbc_en;
|
|
|
|
};
|
|
|
|
|
2014-11-05 00:06:41 +07:00
|
|
|
struct skl_ddb_entry {
|
2014-11-05 00:06:53 +07:00
|
|
|
uint16_t start, end; /* in number of blocks, 'end' is exclusive */
|
2014-11-05 00:06:41 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
static inline uint16_t skl_ddb_entry_size(const struct skl_ddb_entry *entry)
|
|
|
|
{
|
2014-11-05 00:06:53 +07:00
|
|
|
return entry->end - entry->start;
|
2014-11-05 00:06:41 +07:00
|
|
|
}
|
|
|
|
|
2014-11-05 00:06:52 +07:00
|
|
|
static inline bool skl_ddb_entry_equal(const struct skl_ddb_entry *e1,
|
|
|
|
const struct skl_ddb_entry *e2)
|
|
|
|
{
|
|
|
|
if (e1->start == e2->start && e1->end == e2->end)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-11-05 00:06:41 +07:00
|
|
|
struct skl_ddb_allocation {
|
2018-04-09 10:41:01 +07:00
|
|
|
/* packed/y */
|
|
|
|
struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES];
|
|
|
|
struct skl_ddb_entry uv_plane[I915_MAX_PIPES][I915_MAX_PLANES];
|
2018-04-26 21:25:15 +07:00
|
|
|
u8 enabled_slices; /* GEN11 has configurable 2 slices */
|
2014-11-05 00:06:41 +07:00
|
|
|
};
|
|
|
|
|
2018-04-09 10:41:00 +07:00
|
|
|
struct skl_ddb_values {
|
2016-05-12 21:06:07 +07:00
|
|
|
unsigned dirty_pipes;
|
2014-11-05 00:06:41 +07:00
|
|
|
struct skl_ddb_allocation ddb;
|
2014-11-05 00:06:40 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
struct skl_wm_level {
|
2016-10-05 01:28:20 +07:00
|
|
|
bool plane_en;
|
|
|
|
uint16_t plane_res_b;
|
|
|
|
uint8_t plane_res_l;
|
2014-11-05 00:06:40 +07:00
|
|
|
};
|
|
|
|
|
2017-08-17 20:45:23 +07:00
|
|
|
/* Stores plane specific WM parameters */
|
|
|
|
struct skl_wm_params {
|
|
|
|
bool x_tiled, y_tiled;
|
|
|
|
bool rc_surface;
|
2018-04-09 10:41:04 +07:00
|
|
|
bool is_planar;
|
2017-08-17 20:45:23 +07:00
|
|
|
uint32_t width;
|
|
|
|
uint8_t cpp;
|
|
|
|
uint32_t plane_pixel_rate;
|
|
|
|
uint32_t y_min_scanlines;
|
|
|
|
uint32_t plane_bytes_per_line;
|
|
|
|
uint_fixed_16_16_t plane_blocks_per_line;
|
|
|
|
uint_fixed_16_16_t y_tile_minimum;
|
|
|
|
uint32_t linetime_us;
|
2018-01-30 20:49:11 +07:00
|
|
|
uint32_t dbuf_block_size;
|
2017-08-17 20:45:23 +07:00
|
|
|
};
|
|
|
|
|
2013-08-19 23:18:09 +07:00
|
|
|
/*
|
2014-03-08 06:08:18 +07:00
|
|
|
* This struct helps tracking the state needed for runtime PM, which puts the
|
|
|
|
* device in PCI D3 state. Notice that when this happens, nothing on the
|
|
|
|
* graphics device works, even register access, so we don't get interrupts nor
|
|
|
|
* anything else.
|
2013-08-19 23:18:09 +07:00
|
|
|
*
|
2014-03-08 06:08:18 +07:00
|
|
|
* Every piece of our code that needs to actually touch the hardware needs to
|
|
|
|
* either call intel_runtime_pm_get or call intel_display_power_get with the
|
|
|
|
* appropriate power domain.
|
drm/i915: make PC8 be part of runtime PM suspend/resume
Currently, when our driver becomes idle for i915.pc8_timeout (default:
5s) we enable PC8, so we save some power, but not everything we can.
Then, while PC8 is enabled, if we stay idle for more
autosuspend_delay_ms (default: 10s) we'll enter runtime PM and put the
graphics device in D3 state, saving even more power. The two features
are separate things with increasing levels of power savings, but if we
disable PC8 we'll never get into D3.
While from the modularity point of view it would be nice to keep these
features as separate, we have reasons to merge them:
- We are not aware of anybody wanting a "PC8 without D3" environment.
- If we keep both features as separate, we'll have to to test both
PC8 and PC8+D3 code paths. We're already having a major pain to
make QA do automated testing of just one thing, testing both paths
will cost even more.
- Only Haswell+ supports PC8, so if we want to add runtime PM support
to, for example, IVB, we'll have to copy some code from the PC8
feature to runtime PM, so merging both features as a single thing
will make it easier for enabling runtime PM on other platforms.
This patch only does the very basic steps required to have PC8 and
runtime PM merged on a single feature: the next patches will take care
of cleaning up everything.
v2: - Rebase.
v3: - Rebase.
- Fully remove the deprecated i915 params since Daniel doesn't
consider them as part of the ABI.
v4: - Rebase.
- Fix typo in the commit message.
v5: - Rebase, again.
- Add a huge comment explaining the different forcewake usage
(Chris, Daniel).
- Use open-coded forcewake functions (Daniel).
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-03-08 06:08:05 +07:00
|
|
|
*
|
2014-03-08 06:08:18 +07:00
|
|
|
* Our driver uses the autosuspend delay feature, which means we'll only really
|
|
|
|
* suspend if we stay with zero refcount for a certain amount of time. The
|
2014-09-30 15:56:39 +07:00
|
|
|
* default value is currently very conservative (see intel_runtime_pm_enable), but
|
2014-03-08 06:08:18 +07:00
|
|
|
* it can be changed with the standard runtime PM files from sysfs.
|
2013-08-19 23:18:09 +07:00
|
|
|
*
|
|
|
|
* The irqs_disabled variable becomes true exactly after we disable the IRQs and
|
|
|
|
* goes back to false exactly before we reenable the IRQs. We use this variable
|
|
|
|
* to check if someone is trying to enable/disable IRQs while they're supposed
|
|
|
|
* to be disabled. This shouldn't happen and we'll print some error messages in
|
2014-03-08 06:12:32 +07:00
|
|
|
* case it happens.
|
2013-08-19 23:18:09 +07:00
|
|
|
*
|
2014-03-08 06:08:18 +07:00
|
|
|
* For more, read the Documentation/power/runtime_pm.txt.
|
2013-08-19 23:18:09 +07:00
|
|
|
*/
|
2014-03-08 06:08:15 +07:00
|
|
|
struct i915_runtime_pm {
|
2015-12-16 07:52:19 +07:00
|
|
|
atomic_t wakeref_count;
|
2014-03-08 06:08:15 +07:00
|
|
|
bool suspended;
|
2014-09-30 15:56:43 +07:00
|
|
|
bool irqs_enabled;
|
2013-08-19 23:18:09 +07:00
|
|
|
};
|
|
|
|
|
2013-10-16 18:30:34 +07:00
|
|
|
enum intel_pipe_crc_source {
|
|
|
|
INTEL_PIPE_CRC_SOURCE_NONE,
|
|
|
|
INTEL_PIPE_CRC_SOURCE_PLANE1,
|
|
|
|
INTEL_PIPE_CRC_SOURCE_PLANE2,
|
|
|
|
INTEL_PIPE_CRC_SOURCE_PF,
|
2013-10-17 03:55:48 +07:00
|
|
|
INTEL_PIPE_CRC_SOURCE_PIPE,
|
2013-10-17 03:55:58 +07:00
|
|
|
/* TV/DP on pre-gen5/vlv can't use the pipe source. */
|
|
|
|
INTEL_PIPE_CRC_SOURCE_TV,
|
|
|
|
INTEL_PIPE_CRC_SOURCE_DP_B,
|
|
|
|
INTEL_PIPE_CRC_SOURCE_DP_C,
|
|
|
|
INTEL_PIPE_CRC_SOURCE_DP_D,
|
2013-11-01 16:50:20 +07:00
|
|
|
INTEL_PIPE_CRC_SOURCE_AUTO,
|
2013-10-16 18:30:34 +07:00
|
|
|
INTEL_PIPE_CRC_SOURCE_MAX,
|
|
|
|
};
|
|
|
|
|
2013-10-16 00:55:29 +07:00
|
|
|
#define INTEL_PIPE_CRC_ENTRIES_NR 128
|
2013-10-16 00:55:27 +07:00
|
|
|
struct intel_pipe_crc {
|
2013-10-21 20:29:30 +07:00
|
|
|
spinlock_t lock;
|
2017-01-10 20:43:04 +07:00
|
|
|
int skipped;
|
2018-06-28 14:23:02 +07:00
|
|
|
enum intel_pipe_crc_source source;
|
2013-10-16 00:55:27 +07:00
|
|
|
};
|
|
|
|
|
drm/i915: Track frontbuffer invalidation/flushing
So these are the guts of the new beast. This tracks when a frontbuffer
gets invalidated (due to frontbuffer rendering) and hence should be
constantly scaned out, and when it's flushed again and can be
compressed/one-shot-upload.
Rules for flushing are simple: The frontbuffer needs one more full
upload starting from the next vblank. Which means that the flushing
can _only_ be called once the frontbuffer update has been latched.
But this poses a problem for pageflips: We can't just delay the
flushing until the pageflip is latched, since that would pose the risk
that we override frontbuffer rendering that has been scheduled
in-between the pageflip ioctl and the actual latching.
To handle this track asynchronous invalidations (and also pageflip)
state per-ring and delay any in-between flushing until the rendering
has completed. And also cancel any delayed flushing if we get a new
invalidation request (whether delayed or not).
Also call intel_mark_fb_busy in both cases in all cases to make sure
that we keep the screen at the highest refresh rate both on flips,
synchronous plane updates and for frontbuffer rendering.
v2: Lots of improvements
Suggestions from Chris:
- Move invalidate/flush in flush_*_domain and set_to_*_domain.
- Drop the flush in busy_ioctl since it's redundant. Was a leftover
from an earlier concept to track flips/delayed flushes.
- Don't forget about the initial modeset enable/final disable.
Suggested by Chris.
Track flips accurately, too. Since flips complete independently of
rendering we need to track pending flips in a separate mask. Again if
an invalidate happens we need to cancel the evenutal flush to avoid
races.
v3:
Provide correct header declarations for flip functions. Currently not
needed outside of intel_display.c, but part of the proper interface.
v4: Add proper domain management to fbcon so that the fbcon buffer is
also tracked correctly.
v5: Fixup locking around the fbcon set_to_gtt_domain call.
v6: More comments from Chris:
- Split out fbcon changes.
- Drop superflous checks for potential scanout before calling intel_fb
functions - we can micro-optimize this later.
- s/intel_fb_/intel_fb_obj_/ to make it clear that this deals in gem
object. We already have precedence for fb_obj in the pin_and_fence
functions.
v7: Clarify the semantics of the flip flush handling by renaming
things a bit:
- Don't go through a gem object but take the relevant frontbuffer bits
directly. These functions center on the plane, the actual object is
irrelevant - even a flip to the same object as already active should
cause a flush.
- Add a new intel_frontbuffer_flip for synchronous plane updates. It
currently just calls intel_frontbuffer_flush since the implemenation
differs.
This way we achieve a clear split between one-shot update events on
one side and frontbuffer rendering with potentially a very long delay
between the invalidate and flush.
Chris and I also had some discussions about mark_busy and whether it
is appropriate to call from flush. But mark busy is a state which
should be derived from the 3 events (invalidate, flush, flip) we now
have by the users, like psr does by tracking relevant information in
psr.busy_frontbuffer_bits. DRRS (the only real use of mark_busy for
frontbuffer) needs to have similar logic. With that the overall
mark_busy in the core could be removed.
v8: Only when retiring gpu buffers only flush frontbuffer bits we
actually invalidated in a batch. Just for safety since before any
additional usage/invalidate we should always retire current rendering.
Suggested by Chris Wilson.
v9: Actually use intel_frontbuffer_flip in all appropriate places.
Spotted by Chris.
v10: Address more comments from Chris:
- Don't call _flip in set_base when the crtc is inactive, avoids redunancy
in the modeset case with the initial enabling of all planes.
- Add comments explaining that the initial/final plane enable/disable
still has work left to do before it's fully generic.
v11: Only invalidate for gtt/cpu access when writing. Spotted by Chris.
v12: s/_flush/_flip/ in intel_overlay.c per Chris' comment.
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-06-19 21:01:59 +07:00
|
|
|
struct i915_frontbuffer_tracking {
|
2016-08-04 22:32:36 +07:00
|
|
|
spinlock_t lock;
|
drm/i915: Track frontbuffer invalidation/flushing
So these are the guts of the new beast. This tracks when a frontbuffer
gets invalidated (due to frontbuffer rendering) and hence should be
constantly scaned out, and when it's flushed again and can be
compressed/one-shot-upload.
Rules for flushing are simple: The frontbuffer needs one more full
upload starting from the next vblank. Which means that the flushing
can _only_ be called once the frontbuffer update has been latched.
But this poses a problem for pageflips: We can't just delay the
flushing until the pageflip is latched, since that would pose the risk
that we override frontbuffer rendering that has been scheduled
in-between the pageflip ioctl and the actual latching.
To handle this track asynchronous invalidations (and also pageflip)
state per-ring and delay any in-between flushing until the rendering
has completed. And also cancel any delayed flushing if we get a new
invalidation request (whether delayed or not).
Also call intel_mark_fb_busy in both cases in all cases to make sure
that we keep the screen at the highest refresh rate both on flips,
synchronous plane updates and for frontbuffer rendering.
v2: Lots of improvements
Suggestions from Chris:
- Move invalidate/flush in flush_*_domain and set_to_*_domain.
- Drop the flush in busy_ioctl since it's redundant. Was a leftover
from an earlier concept to track flips/delayed flushes.
- Don't forget about the initial modeset enable/final disable.
Suggested by Chris.
Track flips accurately, too. Since flips complete independently of
rendering we need to track pending flips in a separate mask. Again if
an invalidate happens we need to cancel the evenutal flush to avoid
races.
v3:
Provide correct header declarations for flip functions. Currently not
needed outside of intel_display.c, but part of the proper interface.
v4: Add proper domain management to fbcon so that the fbcon buffer is
also tracked correctly.
v5: Fixup locking around the fbcon set_to_gtt_domain call.
v6: More comments from Chris:
- Split out fbcon changes.
- Drop superflous checks for potential scanout before calling intel_fb
functions - we can micro-optimize this later.
- s/intel_fb_/intel_fb_obj_/ to make it clear that this deals in gem
object. We already have precedence for fb_obj in the pin_and_fence
functions.
v7: Clarify the semantics of the flip flush handling by renaming
things a bit:
- Don't go through a gem object but take the relevant frontbuffer bits
directly. These functions center on the plane, the actual object is
irrelevant - even a flip to the same object as already active should
cause a flush.
- Add a new intel_frontbuffer_flip for synchronous plane updates. It
currently just calls intel_frontbuffer_flush since the implemenation
differs.
This way we achieve a clear split between one-shot update events on
one side and frontbuffer rendering with potentially a very long delay
between the invalidate and flush.
Chris and I also had some discussions about mark_busy and whether it
is appropriate to call from flush. But mark busy is a state which
should be derived from the 3 events (invalidate, flush, flip) we now
have by the users, like psr does by tracking relevant information in
psr.busy_frontbuffer_bits. DRRS (the only real use of mark_busy for
frontbuffer) needs to have similar logic. With that the overall
mark_busy in the core could be removed.
v8: Only when retiring gpu buffers only flush frontbuffer bits we
actually invalidated in a batch. Just for safety since before any
additional usage/invalidate we should always retire current rendering.
Suggested by Chris Wilson.
v9: Actually use intel_frontbuffer_flip in all appropriate places.
Spotted by Chris.
v10: Address more comments from Chris:
- Don't call _flip in set_base when the crtc is inactive, avoids redunancy
in the modeset case with the initial enabling of all planes.
- Add comments explaining that the initial/final plane enable/disable
still has work left to do before it's fully generic.
v11: Only invalidate for gtt/cpu access when writing. Spotted by Chris.
v12: s/_flush/_flip/ in intel_overlay.c per Chris' comment.
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-06-19 21:01:59 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Tracking bits for delayed frontbuffer flushing du to gpu activity or
|
|
|
|
* scheduled flips.
|
|
|
|
*/
|
|
|
|
unsigned busy_bits;
|
|
|
|
unsigned flip_bits;
|
|
|
|
};
|
|
|
|
|
2014-10-07 21:21:26 +07:00
|
|
|
struct i915_wa_reg {
|
2018-06-15 19:02:07 +07:00
|
|
|
u32 addr;
|
2014-10-07 21:21:26 +07:00
|
|
|
u32 value;
|
|
|
|
/* bitmask representing WA bits */
|
|
|
|
u32 mask;
|
|
|
|
};
|
|
|
|
|
2017-10-18 03:27:51 +07:00
|
|
|
#define I915_MAX_WA_REGS 16
|
2014-10-07 21:21:26 +07:00
|
|
|
|
|
|
|
struct i915_workarounds {
|
|
|
|
struct i915_wa_reg reg[I915_MAX_WA_REGS];
|
|
|
|
u32 count;
|
|
|
|
};
|
|
|
|
|
2015-02-10 18:05:47 +07:00
|
|
|
struct i915_virtual_gpu {
|
|
|
|
bool active;
|
2017-08-14 14:20:46 +07:00
|
|
|
u32 caps;
|
2015-02-10 18:05:47 +07:00
|
|
|
};
|
|
|
|
|
2015-09-25 05:53:18 +07:00
|
|
|
/* used in computing the new watermarks state */
|
|
|
|
struct intel_wm_config {
|
|
|
|
unsigned int num_pipes_active;
|
|
|
|
bool sprites_enabled;
|
|
|
|
bool sprites_scaled;
|
|
|
|
};
|
|
|
|
|
2016-11-08 02:49:52 +07:00
|
|
|
struct i915_oa_format {
|
|
|
|
u32 format;
|
|
|
|
int size;
|
|
|
|
};
|
|
|
|
|
2016-11-08 02:49:51 +07:00
|
|
|
struct i915_oa_reg {
|
|
|
|
i915_reg_t addr;
|
|
|
|
u32 value;
|
|
|
|
};
|
|
|
|
|
2017-08-03 23:58:08 +07:00
|
|
|
struct i915_oa_config {
|
|
|
|
char uuid[UUID_STRING_LEN + 1];
|
|
|
|
int id;
|
|
|
|
|
|
|
|
const struct i915_oa_reg *mux_regs;
|
|
|
|
u32 mux_regs_len;
|
|
|
|
const struct i915_oa_reg *b_counter_regs;
|
|
|
|
u32 b_counter_regs_len;
|
|
|
|
const struct i915_oa_reg *flex_regs;
|
|
|
|
u32 flex_regs_len;
|
|
|
|
|
|
|
|
struct attribute_group sysfs_metric;
|
|
|
|
struct attribute *attrs[2];
|
|
|
|
struct device_attribute sysfs_metric_id;
|
2017-08-04 00:05:50 +07:00
|
|
|
|
|
|
|
atomic_t ref_count;
|
2017-08-03 23:58:08 +07:00
|
|
|
};
|
|
|
|
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
struct i915_perf_stream;
|
|
|
|
|
2016-12-08 04:40:33 +07:00
|
|
|
/**
|
|
|
|
* struct i915_perf_stream_ops - the OPs to support a specific stream type
|
|
|
|
*/
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
struct i915_perf_stream_ops {
|
2016-12-08 04:40:33 +07:00
|
|
|
/**
|
|
|
|
* @enable: Enables the collection of HW samples, either in response to
|
|
|
|
* `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
|
|
|
|
* without `I915_PERF_FLAG_DISABLED`.
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
*/
|
|
|
|
void (*enable)(struct i915_perf_stream *stream);
|
|
|
|
|
2016-12-08 04:40:33 +07:00
|
|
|
/**
|
|
|
|
* @disable: Disables the collection of HW samples, either in response
|
|
|
|
* to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
|
|
|
|
* the stream.
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
*/
|
|
|
|
void (*disable)(struct i915_perf_stream *stream);
|
|
|
|
|
2016-12-08 04:40:33 +07:00
|
|
|
/**
|
|
|
|
* @poll_wait: Call poll_wait, passing a wait queue that will be woken
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
* once there is something ready to read() for the stream
|
|
|
|
*/
|
|
|
|
void (*poll_wait)(struct i915_perf_stream *stream,
|
|
|
|
struct file *file,
|
|
|
|
poll_table *wait);
|
|
|
|
|
2016-12-08 04:40:33 +07:00
|
|
|
/**
|
|
|
|
* @wait_unlocked: For handling a blocking read, wait until there is
|
|
|
|
* something to ready to read() for the stream. E.g. wait on the same
|
2016-11-08 02:49:52 +07:00
|
|
|
* wait queue that would be passed to poll_wait().
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
*/
|
|
|
|
int (*wait_unlocked)(struct i915_perf_stream *stream);
|
|
|
|
|
2016-12-08 04:40:33 +07:00
|
|
|
/**
|
|
|
|
* @read: Copy buffered metrics as records to userspace
|
|
|
|
* **buf**: the userspace, destination buffer
|
|
|
|
* **count**: the number of bytes to copy, requested by userspace
|
|
|
|
* **offset**: zero at the start of the read, updated as the read
|
|
|
|
* proceeds, it represents how many bytes have been copied so far and
|
|
|
|
* the buffer offset for copying the next record.
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
*
|
2016-12-08 04:40:33 +07:00
|
|
|
* Copy as many buffered i915 perf samples and records for this stream
|
|
|
|
* to userspace as will fit in the given buffer.
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
*
|
2016-12-08 04:40:33 +07:00
|
|
|
* Only write complete records; returning -%ENOSPC if there isn't room
|
|
|
|
* for a complete record.
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
*
|
2016-12-08 04:40:33 +07:00
|
|
|
* Return any error condition that results in a short read such as
|
|
|
|
* -%ENOSPC or -%EFAULT, even though these may be squashed before
|
|
|
|
* returning to userspace.
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
*/
|
|
|
|
int (*read)(struct i915_perf_stream *stream,
|
|
|
|
char __user *buf,
|
|
|
|
size_t count,
|
|
|
|
size_t *offset);
|
|
|
|
|
2016-12-08 04:40:33 +07:00
|
|
|
/**
|
|
|
|
* @destroy: Cleanup any stream specific resources.
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
*
|
|
|
|
* The stream will always be disabled before this is called.
|
|
|
|
*/
|
|
|
|
void (*destroy)(struct i915_perf_stream *stream);
|
|
|
|
};
|
|
|
|
|
2016-12-08 04:40:33 +07:00
|
|
|
/**
|
|
|
|
* struct i915_perf_stream - state for a single open stream FD
|
|
|
|
*/
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
struct i915_perf_stream {
|
2016-12-08 04:40:33 +07:00
|
|
|
/**
|
|
|
|
* @dev_priv: i915 drm device
|
|
|
|
*/
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
struct drm_i915_private *dev_priv;
|
|
|
|
|
2016-12-08 04:40:33 +07:00
|
|
|
/**
|
|
|
|
* @link: Links the stream into ``&drm_i915_private->streams``
|
|
|
|
*/
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
struct list_head link;
|
|
|
|
|
2016-12-08 04:40:33 +07:00
|
|
|
/**
|
|
|
|
* @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
|
|
|
|
* properties given when opening a stream, representing the contents
|
|
|
|
* of a single sample as read() by userspace.
|
|
|
|
*/
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
u32 sample_flags;
|
2016-12-08 04:40:33 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @sample_size: Considering the configured contents of a sample
|
|
|
|
* combined with the required header size, this is the total size
|
|
|
|
* of a single sample record.
|
|
|
|
*/
|
2016-11-08 02:49:52 +07:00
|
|
|
int sample_size;
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
|
2016-12-08 04:40:33 +07:00
|
|
|
/**
|
|
|
|
* @ctx: %NULL if measuring system-wide across all contexts or a
|
|
|
|
* specific context that is being monitored.
|
|
|
|
*/
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
struct i915_gem_context *ctx;
|
2016-12-08 04:40:33 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @enabled: Whether the stream is currently enabled, considering
|
|
|
|
* whether the stream was opened in a disabled state and based
|
|
|
|
* on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
|
|
|
|
*/
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
bool enabled;
|
|
|
|
|
2016-12-08 04:40:33 +07:00
|
|
|
/**
|
|
|
|
* @ops: The callbacks providing the implementation of this specific
|
|
|
|
* type of configured stream.
|
|
|
|
*/
|
2016-11-08 02:49:52 +07:00
|
|
|
const struct i915_perf_stream_ops *ops;
|
2017-08-03 23:58:08 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @oa_config: The OA configuration used by the stream.
|
|
|
|
*/
|
|
|
|
struct i915_oa_config *oa_config;
|
2016-11-08 02:49:52 +07:00
|
|
|
};
|
|
|
|
|
2016-12-08 04:40:33 +07:00
|
|
|
/**
|
|
|
|
* struct i915_oa_ops - Gen specific implementation of an OA unit stream
|
|
|
|
*/
|
2016-11-08 02:49:52 +07:00
|
|
|
struct i915_oa_ops {
|
2017-08-04 00:05:50 +07:00
|
|
|
/**
|
|
|
|
* @is_valid_b_counter_reg: Validates register's address for
|
|
|
|
* programming boolean counters for a particular platform.
|
|
|
|
*/
|
|
|
|
bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv,
|
|
|
|
u32 addr);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @is_valid_mux_reg: Validates register's address for programming mux
|
|
|
|
* for a particular platform.
|
|
|
|
*/
|
|
|
|
bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @is_valid_flex_reg: Validates register's address for programming
|
|
|
|
* flex EU filtering for a particular platform.
|
|
|
|
*/
|
|
|
|
bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr);
|
|
|
|
|
2016-12-08 04:40:33 +07:00
|
|
|
/**
|
|
|
|
* @init_oa_buffer: Resets the head and tail pointers of the
|
|
|
|
* circular buffer for periodic OA reports.
|
|
|
|
*
|
|
|
|
* Called when first opening a stream for OA metrics, but also may be
|
|
|
|
* called in response to an OA buffer overflow or other error
|
|
|
|
* condition.
|
|
|
|
*
|
|
|
|
* Note it may be necessary to clear the full OA buffer here as part of
|
|
|
|
* maintaining the invariable that new reports must be written to
|
|
|
|
* zeroed memory for us to be able to reliable detect if an expected
|
|
|
|
* report has not yet landed in memory. (At least on Haswell the OA
|
|
|
|
* buffer tail pointer is not synchronized with reports being visible
|
|
|
|
* to the CPU)
|
|
|
|
*/
|
2016-11-08 02:49:52 +07:00
|
|
|
void (*init_oa_buffer)(struct drm_i915_private *dev_priv);
|
2016-12-08 04:40:33 +07:00
|
|
|
|
2017-06-13 18:23:03 +07:00
|
|
|
/**
|
|
|
|
* @enable_metric_set: Selects and applies any MUX configuration to set
|
|
|
|
* up the Boolean and Custom (B/C) counters that are part of the
|
|
|
|
* counter reports being sampled. May apply system constraints such as
|
2016-12-08 04:40:33 +07:00
|
|
|
* disabling EU clock gating as required.
|
|
|
|
*/
|
2017-08-03 23:58:08 +07:00
|
|
|
int (*enable_metric_set)(struct drm_i915_private *dev_priv,
|
|
|
|
const struct i915_oa_config *oa_config);
|
2016-12-08 04:40:33 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @disable_metric_set: Remove system constraints associated with using
|
|
|
|
* the OA unit.
|
|
|
|
*/
|
2016-11-08 02:49:52 +07:00
|
|
|
void (*disable_metric_set)(struct drm_i915_private *dev_priv);
|
2016-12-08 04:40:33 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @oa_enable: Enable periodic sampling
|
|
|
|
*/
|
2016-11-08 02:49:52 +07:00
|
|
|
void (*oa_enable)(struct drm_i915_private *dev_priv);
|
2016-12-08 04:40:33 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @oa_disable: Disable periodic sampling
|
|
|
|
*/
|
2016-11-08 02:49:52 +07:00
|
|
|
void (*oa_disable)(struct drm_i915_private *dev_priv);
|
2016-12-08 04:40:33 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @read: Copy data from the circular OA buffer into a given userspace
|
|
|
|
* buffer.
|
|
|
|
*/
|
2016-11-08 02:49:52 +07:00
|
|
|
int (*read)(struct i915_perf_stream *stream,
|
|
|
|
char __user *buf,
|
|
|
|
size_t count,
|
|
|
|
size_t *offset);
|
2016-12-08 04:40:33 +07:00
|
|
|
|
|
|
|
/**
|
2017-06-13 18:23:03 +07:00
|
|
|
* @oa_hw_tail_read: read the OA tail pointer register
|
2016-12-08 04:40:33 +07:00
|
|
|
*
|
2017-06-13 18:23:03 +07:00
|
|
|
* In particular this enables us to share all the fiddly code for
|
|
|
|
* handling the OA unit tail pointer race that affects multiple
|
|
|
|
* generations.
|
2016-12-08 04:40:33 +07:00
|
|
|
*/
|
2017-06-13 18:23:03 +07:00
|
|
|
u32 (*oa_hw_tail_read)(struct drm_i915_private *dev_priv);
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
};
|
|
|
|
|
2017-02-08 01:33:45 +07:00
|
|
|
struct intel_cdclk_state {
|
2018-01-18 00:25:08 +07:00
|
|
|
unsigned int cdclk, vco, ref, bypass;
|
2017-10-24 16:52:08 +07:00
|
|
|
u8 voltage_level;
|
2017-02-08 01:33:45 +07:00
|
|
|
};
|
|
|
|
|
2014-03-31 18:27:22 +07:00
|
|
|
struct drm_i915_private {
|
2016-06-24 20:00:18 +07:00
|
|
|
struct drm_device drm;
|
|
|
|
|
2015-04-07 22:20:57 +07:00
|
|
|
struct kmem_cache *objects;
|
2015-04-07 22:20:58 +07:00
|
|
|
struct kmem_cache *vmas;
|
2017-08-16 15:52:08 +07:00
|
|
|
struct kmem_cache *luts;
|
2015-04-07 22:20:57 +07:00
|
|
|
struct kmem_cache *requests;
|
2016-11-15 03:41:02 +07:00
|
|
|
struct kmem_cache *dependencies;
|
2017-05-17 19:10:04 +07:00
|
|
|
struct kmem_cache *priorities;
|
2012-11-03 01:55:02 +07:00
|
|
|
|
2014-02-08 02:12:48 +07:00
|
|
|
const struct intel_device_info info;
|
2018-02-08 04:05:43 +07:00
|
|
|
struct intel_driver_caps caps;
|
2012-11-03 01:55:02 +07:00
|
|
|
|
2017-12-11 22:18:18 +07:00
|
|
|
/**
|
|
|
|
* Data Stolen Memory - aka "i915 stolen memory" gives us the start and
|
|
|
|
* end of stolen which we can optionally use to create GEM objects
|
2017-12-11 22:18:21 +07:00
|
|
|
* backed by stolen memory. Note that stolen_usable_size tells us
|
2017-12-11 22:18:18 +07:00
|
|
|
* exactly how much of this we are actually allowed to use, given that
|
|
|
|
* some portion of it is in fact reserved for use by hardware functions.
|
|
|
|
*/
|
|
|
|
struct resource dsm;
|
2017-12-11 22:18:19 +07:00
|
|
|
/**
|
|
|
|
* Reseved portion of Data Stolen Memory
|
|
|
|
*/
|
|
|
|
struct resource dsm_reserved;
|
2017-12-11 22:18:18 +07:00
|
|
|
|
2017-12-11 22:18:21 +07:00
|
|
|
/*
|
|
|
|
* Stolen memory is segmented in hardware with different portions
|
|
|
|
* offlimits to certain functions.
|
|
|
|
*
|
|
|
|
* The drm_mm is initialised to the total accessible range, as found
|
|
|
|
* from the PCI config. On Broadwell+, this is further restricted to
|
|
|
|
* avoid the first page! The upper end of stolen memory is reserved for
|
|
|
|
* hardware functions and similarly removed from the accessible range.
|
|
|
|
*/
|
2017-12-11 22:18:22 +07:00
|
|
|
resource_size_t stolen_usable_size; /* Total size minus reserved ranges */
|
2017-12-11 22:18:21 +07:00
|
|
|
|
2012-11-03 01:55:02 +07:00
|
|
|
void __iomem *regs;
|
|
|
|
|
2013-07-20 02:36:52 +07:00
|
|
|
struct intel_uncore uncore;
|
2012-11-03 01:55:02 +07:00
|
|
|
|
2015-02-10 18:05:47 +07:00
|
|
|
struct i915_virtual_gpu vgpu;
|
|
|
|
|
2016-10-20 16:15:03 +07:00
|
|
|
struct intel_gvt *gvt;
|
drm/i915: gvt: Introduce the basic architecture of GVT-g
This patch introduces the very basic framework of GVT-g device model,
includes basic prototypes, definitions, initialization.
v12:
- Call intel_gvt_init() in driver early initialization stage. (Chris)
v8:
- Remove the GVT idr and mutex in intel_gvt_host. (Joonas)
v7:
- Refine the URL link in Kconfig. (Joonas)
- Refine the introduction of GVT-g host support in Kconfig. (Joonas)
- Remove the macro GVT_ALIGN(), use round_down() instead. (Joonas)
- Make "struct intel_gvt" a data member in struct drm_i915_private.(Joonas)
- Remove {alloc, free}_gvt_device()
- Rename intel_gvt_{create, destroy}_gvt_device()
- Expost intel_gvt_init_host()
- Remove the dummy "struct intel_gvt" declaration in intel_gvt.h (Joonas)
v6:
- Refine introduction in Kconfig. (Chris)
- The exposed API functions will take struct intel_gvt * instead of
void *. (Chris/Tvrtko)
- Remove most memebers of strct intel_gvt_device_info. Will add them
in the device model patches.(Chris)
- Remove gvt_info() and gvt_err() in debug.h. (Chris)
- Move GVT kernel parameter into i915_params. (Chris)
- Remove include/drm/i915_gvt.h, as GVT-g will be built within i915.
- Remove the redundant struct i915_gvt *, as the functions in i915
will directly take struct intel_gvt *.
- Add more comments for reviewer.
v5:
Take Tvrtko's comments:
- Fix the misspelled words in Kconfig
- Let functions take drm_i915_private * instead of struct drm_device *
- Remove redundant prints/local varible initialization
v3:
Take Joonas' comments:
- Change file name i915_gvt.* to intel_gvt.*
- Move GVT kernel parameter into intel_gvt.c
- Remove redundant debug macros
- Change error handling style
- Add introductions for some stub functions
- Introduce drm/i915_gvt.h.
Take Kevin's comments:
- Move GVT-g host/guest check into intel_vgt_balloon in i915_gem_gtt.c
v2:
- Introduce i915_gvt.c.
It's necessary to introduce the stubs between i915 driver and GVT-g host,
as GVT-g components is configurable in kernel config. When disabled, the
stubs here do nothing.
Take Joonas' comments:
- Replace boolean return value with int.
- Replace customized info/warn/debug macros with DRM macros.
- Document all non-static functions like i915.
- Remove empty and unused functions.
- Replace magic number with marcos.
- Set GVT-g in kernel config to "n" by default.
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1466078825-6662-5-git-send-email-zhi.a.wang@intel.com
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
2016-06-16 19:07:00 +07:00
|
|
|
|
2018-03-14 07:32:50 +07:00
|
|
|
struct intel_wopcm wopcm;
|
|
|
|
|
2017-01-18 23:05:53 +07:00
|
|
|
struct intel_huc huc;
|
2015-08-12 21:43:36 +07:00
|
|
|
struct intel_guc guc;
|
|
|
|
|
drm/i915/skl: Add support to load SKL CSR firmware.
Display Context Save and Restore support is needed for
various SKL Display C states like DC5, DC6.
This implementation is added based on first version of DMC CSR program
that we received from h/w team.
Here we are using request_firmware based design.
Finally this firmware should end up in linux-firmware tree.
For SKL platform its mandatory to ensure that we load this
csr program before enabling DC states like DC5/DC6.
As CSR program gets reset on various conditions, we should ensure
to load it during boot and in future change to be added to load
this system resume sequence too.
v1: Initial relese as RFC patch
v2: Design change as per Daniel, Damien and Shobit's review comments
request firmware method followed.
v3: Some optimization and functional changes.
Pulled register defines into drivers/gpu/drm/i915/i915_reg.h
Used kmemdup to allocate and duplicate firmware content.
Ensured to free allocated buffer.
v4: Modified as per review comments from Satheesh and Daniel
Removed temporary buffer.
Optimized number of writes by replacing I915_WRITE with I915_WRITE64.
v5:
Modified as per review comemnts from Damien.
- Changed name for functions and firmware.
- Introduced HAS_CSR.
- Reverted back previous change and used csr_buf with u8 size.
- Using cpu_to_be64 for endianness change.
Modified as per review comments from Imre.
- Modified registers and macro names to be a bit closer to bspec terminology
and the existing register naming in the driver.
- Early return for non SKL platforms in intel_load_csr_program function.
- Added locking around CSR program load function as it may be called
concurrently during system/runtime resume.
- Releasing the fw before loading the program for consistency
- Handled error path during f/w load.
v6: Modified as per review comments from Imre.
- Corrected out_freecsr sequence.
v7: Modified as per review comments from Imre.
Fail loading fw if fw->size%8!=0.
v8: Rebase to latest.
v9: Rebase on top of -nightly (Damien)
v10: Enabled support for dmc firmware ver 1.0.
According to ver 1.0 in a single binary package all the firmware's that are
required for different stepping's of the product will be stored. The package
contains the css header, followed by the package header and the actual dmc
firmwares. Package header contains the firmware/stepping mapping table and
the corresponding firmware offsets to the individual binaries, within the
package. Each individual program binary contains the header and the payload
sections whose size is specified in the header section. This changes are done
to extract the specific firmaware from the package. (Animesh)
v11: Modified as per review comemnts from Imre.
- Added code comment from bpec for header structure elements.
- Added __packed to avoid structure padding.
- Added helper functions for stepping and substepping info.
- Added code comment for CSR_MAX_FW_SIZE.
- Disabled BXT firmware loading, will be enabled with dmc 1.0 support.
- Changed skl_stepping_info based on bspec, earlier used from config DB.
- Removed duplicate call of cpu_to_be* from intel_csr_load_program function.
- Used cpu_to_be32 instead of cpu_to_be64 as firmware binary in dword aligned.
- Added sanity check for header length.
- Added sanity check for mmio address got from firmware binary.
- kmalloc done separately for dmc header and dmc firmware. (Animesh)
v12: Modified as per review comemnts from Imre.
- Corrected the typo error in skl stepping info structure.
- Added out-of-bound access for skl_stepping_info.
- Sanity check for mmio address modified.
- Sanity check added for stepping and substeppig.
- Modified the intel_dmc_info structure, cache only the required header info. (Animesh)
v13: clarify firmware load error message.
The reason for a firmware loading failure can be obscure if the driver
is built-in. Provide an explanation to the user about the likely reason for
the failure and how to resolve it. (Imre)
v14: Suggested by Jani.
- fix s/I915/CONFIG_DRM_I915/ typo
- add fw_path to the firmware object instead of using a static ptr (Jani)
v15:
1) Changed the firmware name as dmc_gen9.bin, everytime for a new firmware version a symbolic link
with same name will help not to build kernel again.
2) Changes done as per review comments from Imre.
- Error check removed for intel_csr_ucode_init.
- Moved csr-specific data structure to intel_csr.h and optimization done on structure definition.
- fw->data used directly for parsing the header info & memory allocation
only done separately for payload. (Animesh)
v16:
- No need for out_regs label in i915_driver_load(), so removed it.
- Changed the firmware name as skl_dmc_ver1.bin, followed naming convention <platform>_dmc_<api-version>.bin (Animesh)
Issue: VIZ-2569
Signed-off-by: A.Sunil Kamath <sunil.kamath@intel.com>
Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-05-04 19:58:44 +07:00
|
|
|
struct intel_csr csr;
|
|
|
|
|
2015-04-01 14:55:04 +07:00
|
|
|
struct intel_gmbus gmbus[GMBUS_NUM_PINS];
|
2012-12-01 19:53:45 +07:00
|
|
|
|
2012-11-03 01:55:02 +07:00
|
|
|
/** gmbus_mutex protects against concurrent usage of the single hw gmbus
|
|
|
|
* controller on different i2c buses. */
|
|
|
|
struct mutex gmbus_mutex;
|
|
|
|
|
|
|
|
/**
|
2018-07-28 02:36:47 +07:00
|
|
|
* Base address of where the gmbus and gpio blocks are located (either
|
|
|
|
* on PCH or on SoC for platforms without PCH).
|
2012-11-03 01:55:02 +07:00
|
|
|
*/
|
|
|
|
uint32_t gpio_mmio_base;
|
|
|
|
|
2014-05-19 22:24:03 +07:00
|
|
|
/* MMIO base address for MIPI regs */
|
|
|
|
uint32_t mipi_mmio_base;
|
|
|
|
|
2015-11-12 01:34:15 +07:00
|
|
|
uint32_t psr_mmio_base;
|
|
|
|
|
2016-08-10 18:07:29 +07:00
|
|
|
uint32_t pps_mmio_base;
|
|
|
|
|
2012-12-01 19:53:45 +07:00
|
|
|
wait_queue_head_t gmbus_wait_queue;
|
|
|
|
|
2012-11-03 01:55:02 +07:00
|
|
|
struct pci_dev *bridge_dev;
|
drm/i915: Allocate intel_engine_cs structure only for the enabled engines
With the possibility of addition of many more number of rings in future,
the drm_i915_private structure could bloat as an array, of type
intel_engine_cs, is embedded inside it.
struct intel_engine_cs engine[I915_NUM_ENGINES];
Though this is still fine as generally there is only a single instance of
drm_i915_private structure used, but not all of the possible rings would be
enabled or active on most of the platforms. Some memory can be saved by
allocating intel_engine_cs structure only for the enabled/active engines.
Currently the engine/ring ID is kept static and dev_priv->engine[] is simply
indexed using the enums defined in intel_engine_id.
To save memory and continue using the static engine/ring IDs, 'engine' is
defined as an array of pointers.
struct intel_engine_cs *engine[I915_NUM_ENGINES];
dev_priv->engine[engine_ID] will be NULL for disabled engine instances.
There is a text size reduction of 928 bytes, from 1028200 to 1027272, for
i915.o file (but for i915.ko file text size remain same as 1193131 bytes).
v2:
- Remove the engine iterator field added in drm_i915_private structure,
instead pass a local iterator variable to the for_each_engine**
macros. (Chris)
- Do away with intel_engine_initialized() and instead directly use the
NULL pointer check on engine pointer. (Chris)
v3:
- Remove for_each_engine_id() macro, as the updated macro for_each_engine()
can be used in place of it. (Chris)
- Protect the access to Render engine Fault register with a NULL check, as
engine specific init is done later in Driver load sequence.
v4:
- Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris)
- Kill the superfluous init_engine_lists().
v5:
- Cleanup the intel_engines_init() & intel_engines_setup(), with respect to
allocation of intel_engine_cs structure. (Chris)
v6:
- Rebase.
v7:
- Optimize the for_each_engine_masked() macro. (Chris)
- Change the type of 'iter' local variable to enum intel_engine_id. (Chris)
- Rebase.
v8: Rebase.
v9: Rebase.
v10:
- For index calculation use engine ID instead of pointer based arithmetic in
intel_engine_sync_index() as engine pointers are not contiguous now (Chris)
- For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas)
- Use for_each_engine macro for cleanup in intel_engines_init() and remove
check for NULL engine pointer in cleanup() routines. (Joonas)
v11: Rebase.
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Akash Goel <akash.goel@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 00:14:48 +07:00
|
|
|
struct intel_engine_cs *engine[I915_NUM_ENGINES];
|
2017-10-04 03:34:48 +07:00
|
|
|
/* Context used internally to idle the GPU and setup initial state */
|
|
|
|
struct i915_gem_context *kernel_context;
|
|
|
|
/* Context only to be used for injecting preemption commands */
|
|
|
|
struct i915_gem_context *preempt_context;
|
drm/i915/pmu: Expose a PMU interface for perf queries
From: Chris Wilson <chris@chris-wilson.co.uk>
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
From: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
The first goal is to be able to measure GPU (and invidual ring) busyness
without having to poll registers from userspace. (Which not only incurs
holding the forcewake lock indefinitely, perturbing the system, but also
runs the risk of hanging the machine.) As an alternative we can use the
perf event counter interface to sample the ring registers periodically
and send those results to userspace.
Functionality we are exporting to userspace is via the existing perf PMU
API and can be exercised via the existing tools. For example:
perf stat -a -e i915/rcs0-busy/ -I 1000
Will print the render engine busynnes once per second. All the performance
counters can be enumerated (perf list) and have their unit of measure
correctly reported in sysfs.
v1-v2 (Chris Wilson):
v2: Use a common timer for the ring sampling.
v3: (Tvrtko Ursulin)
* Decouple uAPI from i915 engine ids.
* Complete uAPI defines.
* Refactor some code to helpers for clarity.
* Skip sampling disabled engines.
* Expose counters in sysfs.
* Pass in fake regs to avoid null ptr deref in perf core.
* Convert to class/instance uAPI.
* Use shared driver code for rc6 residency, power and frequency.
v4: (Dmitry Rogozhkin)
* Register PMU with .task_ctx_nr=perf_invalid_context
* Expose cpumask for the PMU with the single CPU in the mask
* Properly support pmu->stop(): it should call pmu->read()
* Properly support pmu->del(): it should call stop(event, PERF_EF_UPDATE)
* Introduce refcounting of event subscriptions.
* Make pmu.busy_stats a refcounter to avoid busy stats going away
with some deleted event.
* Expose cpumask for i915 PMU to avoid multiple events creation of
the same type followed by counter aggregation by perf-stat.
* Track CPUs getting online/offline to migrate perf context. If (likely)
cpumask will initially set CPU0, CONFIG_BOOTPARAM_HOTPLUG_CPU0 will be
needed to see effect of CPU status tracking.
* End result is that only global events are supported and perf stat
works correctly.
* Deny perf driver level sampling - it is prohibited for uncore PMU.
v5: (Tvrtko Ursulin)
* Don't hardcode number of engine samplers.
* Rewrite event ref-counting for correctness and simplicity.
* Store initial counter value when starting already enabled events
to correctly report values to all listeners.
* Fix RC6 residency readout.
* Comments, GPL header.
v6:
* Add missing entry to v4 changelog.
* Fix accounting in CPU hotplug case by copying the approach from
arch/x86/events/intel/cstate.c. (Dmitry Rogozhkin)
v7:
* Log failure message only on failure.
* Remove CPU hotplug notification state on unregister.
v8:
* Fix error unwind on failed registration.
* Checkpatch cleanup.
v9:
* Drop the energy metric, it is available via intel_rapl_perf.
(Ville Syrjälä)
* Use HAS_RC6(p). (Chris Wilson)
* Handle unsupported non-engine events. (Dmitry Rogozhkin)
* Rebase for intel_rc6_residency_ns needing caller managed
runtime pm.
* Drop HAS_RC6 checks from the read callback since creating those
events will be rejected at init time already.
* Add counter units to sysfs so perf stat output is nicer.
* Cleanup the attribute tables for brevity and readability.
v10:
* Fixed queued accounting.
v11:
* Move intel_engine_lookup_user to intel_engine_cs.c
* Commit update. (Joonas Lahtinen)
v12:
* More accurate sampling. (Chris Wilson)
* Store and report frequency in MHz for better usability from
perf stat.
* Removed metrics: queued, interrupts, rc6 counters.
* Sample engine busyness based on seqno difference only
for less MMIO (and forcewake) on all platforms. (Chris Wilson)
v13:
* Comment spelling, use mul_u32_u32 to work around potential GCC
issue and somne code alignment changes. (Chris Wilson)
v14:
* Rebase.
v15:
* Rebase for RPS refactoring.
v16:
* Use the dynamic slot in the CPU hotplug state machine so that we are
free to setup our state as multi-instance. Previously we were re-using
the CPUHP_AP_PERF_X86_UNCORE_ONLINE slot which is neither used as
multi-instance, nor owned by our driver to start with.
* Register the CPU hotplug handlers after the PMU, otherwise the callback
will get called before the PMU is initialized which can end up in
perf_pmu_migrate_context with an un-initialized base.
* Added workaround for a probable bug in cpuhp core.
v17:
* Remove workaround for the cpuhp bug.
v18:
* Rebase for drm_i915_gem_engine_class getting upstream before us.
v19:
* Rebase. (trivial)
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171121181852.16128-2-tvrtko.ursulin@linux.intel.com
2017-11-22 01:18:45 +07:00
|
|
|
struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
|
|
|
|
[MAX_ENGINE_INSTANCE + 1];
|
2012-11-03 01:55:02 +07:00
|
|
|
|
|
|
|
struct resource mch_res;
|
|
|
|
|
|
|
|
/* protects the irq masks */
|
|
|
|
spinlock_t irq_lock;
|
|
|
|
|
2014-03-05 00:23:07 +07:00
|
|
|
bool display_irqs_enabled;
|
|
|
|
|
drm/i915: irq-drive the dp aux communication
At least on the platforms that have a dp aux irq and also have it
enabled - vlvhsw should have one, too. But I don't have a machine to
test this on. Judging from docs there's no dp aux interrupt for gm45.
Also, I only have an ivb cpu edp machine, so the dp aux A code for
snb/ilk is untested.
For dpcd probing when nothing is connected it slashes about 5ms of cpu
time (cpu time is now negligible), which agrees with 3 * 5 400 usec
timeouts.
A previous version of this patch increases the time required to go
through the dp_detect cycle (which includes reading the edid) from
around 33 ms to around 40 ms. Experiments indicated that this is
purely due to the irq latency - the hw doesn't allow us to queue up
dp aux transactions and hence irq latency directly affects throughput.
gmbus is much better, there we have a 8 byte buffer, and we get the
irq once another 4 bytes can be queued up.
But by using the pm_qos interface to request the lowest possible cpu
wake-up latency this slowdown completely disappeared.
Since all our output detection logic is single-threaded with the
mode_config mutex right now anyway, I've decide not ot play fancy and
to just reuse the gmbus wait queue. But this would definitely prep the
way to run dp detection on different ports in parallel
v2: Add a timeout for dp aux transfers when using interrupts - the hw
_does_ prevent this with the hw-based 400 usec timeout, but if the
irq somehow doesn't arrive we're screwed. Lesson learned while
developing this ;-)
v3: While at it also convert the busy-loop to wait_for_atomic, so that
we don't run the risk of an infinite loop any more.
v4: Ensure we have the smallest possible irq latency by using the
pm_qos interface.
v5: Add a comment to the code to explain why we frob pm_qos. Suggested
by Chris Wilson.
v6: Disable dp irq for vlv, that's easier than trying to get at docs
and hw.
v7: Squash in a fix for Haswell that Paulo Zanoni tracked down - the
dp aux registers aren't at a fixed offset any more, but can be on the
PCH while the DP port is on the cpu die.
Reviewed-by: Imre Deak <imre.deak@intel.com> (v6)
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-12-01 19:53:48 +07:00
|
|
|
/* To control wakeup latency, e.g. for irq-driven dp aux transfers. */
|
|
|
|
struct pm_qos_request pm_qos;
|
|
|
|
|
2015-05-27 00:42:30 +07:00
|
|
|
/* Sideband mailbox protection */
|
|
|
|
struct mutex sb_lock;
|
2012-11-03 01:55:02 +07:00
|
|
|
|
|
|
|
/** Cached value of IMR to avoid reads in updating the bitfield */
|
drm/i915/bdw: Implement interrupt changes
The interrupt handling implementation remains the same as previous
generations with the 4 types of registers, status, identity, mask, and
enable. However the layout of where the bits go have changed entirely.
To address these changes, all of the interrupt vfuncs needed special
gen8 code.
The way it works is there is a top level status register now which
informs the interrupt service routine which unit caused the interrupt,
and therefore which interrupt registers to read to process the
interrupt. For display the division is quite logical, a set of interrupt
registers for each pipe, and in addition to those, a set each for "misc"
and port.
For GT the things get a bit hairy, as seen by the code. Each of the GT
units has it's own bits defined. They all look *very similar* and
resides in 16 bits of a GT register. As an example, RCS and BCS share
register 0. To compact the code a bit, at a slight expense to
complexity, this is exactly how the code works as well. 2 structures are
added to the ring buffer so that our ring buffer interrupt handling code
knows which ring shares the interrupt registers, and a shift value (ie.
the top or bottom 16 bits of the register).
The above allows us to kept the interrupt register caching scheme, the
per interrupt enables, and the code to mask and unmask interrupts
relatively clean (again at the cost of some more complexity).
Most of the GT units mentioned above are command streamers, and so the
symmetry should work quite well for even the yet to be implemented rings
which Broadwell adds.
v2: Fixes up a couple of bugs, and is more verbose about errors in the
Broadwell interrupt handler.
v3: fix DE_MISC IER offset
v4: Simplify interrupts:
I totally misread the docs the first time I implemented interrupts, and
so this should greatly simplify the mess. Unlike GEN6, we never touch
the regular mask registers in irq_get/put.
v5: Rebased on to of recent pch hotplug setup changes.
v6: Fixup on top of moving num_pipes to intel_info.
v7: Rebased on top of Egbert Eich's hpd irq handling rework. Also
wired up ibx_hpd_irq_setup for gen8.
v8: Rebase on top of Jani's asle handling rework.
v9: Rebase on top of Ben's VECS enabling for Haswell, where he
unfortunately went OCD on the gt irq #defines. Not that they're still
not yet fully consistent:
- Used the GT_RENDER_ #defines + bdw shifts.
- Dropped the shift from the L3_PARITY stuff, seemed clearer.
- s/irq_refcount/irq_refcount.gt/
v10: Squash in VECS enabling patches and the gen8_gt_irq_handler
refactoring from Zhao Yakui <yakui.zhao@intel.com>
v11: Rebase on top of the interrupt cleanups in upstream.
v12: Rebase on top of Ben's DPF changes in upstream.
v13: Drop bdw from the HAS_L3_DPF feature flag for now, it's unclear what
exactly needs to be done. Requested by Ben.
v14: Fix the patch.
- Drop the mask of reserved bits and assorted logic, it doesn't match
the spec.
- Do the posting read inconditionally instead of commenting it out.
- Add a GEN8_MASTER_IRQ_CONTROL definition and use it.
- Fix up the GEN8_PIPE interrupt defines and give the GEN8_ prefixes -
we actually will need to use them.
- Enclose macros in do {} while (0) (checkpatch).
- Clear DE_MISC interrupt bits only after having processed them.
- Fix whitespace fail (checkpatch).
- Fix overtly long lines where appropriate (checkpatch).
- Don't use typedef'ed private_t (maintainer-scripts).
- Align the function parameter list correctly.
Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v4)
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
bikeshed
2013-11-03 11:07:09 +07:00
|
|
|
union {
|
|
|
|
u32 irq_mask;
|
|
|
|
u32 de_irq_mask[I915_MAX_PIPES];
|
|
|
|
};
|
2012-11-03 01:55:02 +07:00
|
|
|
u32 gt_irq_mask;
|
2016-10-12 23:24:30 +07:00
|
|
|
u32 pm_imr;
|
|
|
|
u32 pm_ier;
|
2014-03-15 21:53:22 +07:00
|
|
|
u32 pm_rps_events;
|
2016-10-12 23:24:31 +07:00
|
|
|
u32 pm_guc_events;
|
2014-02-10 23:42:49 +07:00
|
|
|
u32 pipestat_irq_mask[I915_MAX_PIPES];
|
2012-11-03 01:55:02 +07:00
|
|
|
|
2015-05-27 19:03:42 +07:00
|
|
|
struct i915_hotplug hotplug;
|
2016-01-12 02:44:36 +07:00
|
|
|
struct intel_fbc fbc;
|
2014-04-05 13:43:28 +07:00
|
|
|
struct i915_drrs drrs;
|
2012-11-03 01:55:02 +07:00
|
|
|
struct intel_opregion opregion;
|
2013-05-10 06:03:18 +07:00
|
|
|
struct intel_vbt_data vbt;
|
2012-11-03 01:55:02 +07:00
|
|
|
|
2014-10-10 02:57:43 +07:00
|
|
|
bool preserve_bios_swizzle;
|
|
|
|
|
2012-11-03 01:55:02 +07:00
|
|
|
/* overlay */
|
|
|
|
struct intel_overlay *overlay;
|
|
|
|
|
2013-11-08 21:48:54 +07:00
|
|
|
/* backlight registers and fields in struct intel_panel */
|
2014-09-15 19:35:09 +07:00
|
|
|
struct mutex backlight_lock;
|
2013-04-02 19:48:09 +07:00
|
|
|
|
2012-11-03 01:55:02 +07:00
|
|
|
/* LVDS info */
|
|
|
|
bool no_aux_handshake;
|
|
|
|
|
2014-09-04 18:53:14 +07:00
|
|
|
/* protects panel power sequencer state */
|
|
|
|
struct mutex pps_mutex;
|
|
|
|
|
2012-11-03 01:55:02 +07:00
|
|
|
struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */
|
|
|
|
int num_fence_regs; /* 8 on pre-965, 16 otherwise */
|
|
|
|
|
|
|
|
unsigned int fsb_freq, mem_freq, is_ddr3;
|
2016-05-14 03:41:27 +07:00
|
|
|
unsigned int skl_preferred_vco_freq;
|
2017-02-08 01:33:45 +07:00
|
|
|
unsigned int max_cdclk_freq;
|
2016-11-14 23:35:10 +07:00
|
|
|
|
2015-08-18 18:36:59 +07:00
|
|
|
unsigned int max_dotclk_freq;
|
2016-03-02 22:22:13 +07:00
|
|
|
unsigned int rawclk_freq;
|
2014-10-07 21:41:22 +07:00
|
|
|
unsigned int hpll_freq;
|
2017-11-05 20:49:05 +07:00
|
|
|
unsigned int fdi_pll_freq;
|
2015-09-25 03:29:18 +07:00
|
|
|
unsigned int czclk_freq;
|
2012-11-03 01:55:02 +07:00
|
|
|
|
2016-05-14 03:41:32 +07:00
|
|
|
struct {
|
2017-01-21 01:21:59 +07:00
|
|
|
/*
|
|
|
|
* The current logical cdclk state.
|
|
|
|
* See intel_atomic_state.cdclk.logical
|
|
|
|
*
|
|
|
|
* For reading holding any crtc lock is sufficient,
|
|
|
|
* for writing must hold all of them.
|
|
|
|
*/
|
|
|
|
struct intel_cdclk_state logical;
|
|
|
|
/*
|
|
|
|
* The current actual cdclk state.
|
|
|
|
* See intel_atomic_state.cdclk.actual
|
|
|
|
*/
|
|
|
|
struct intel_cdclk_state actual;
|
|
|
|
/* The current hardware cdclk state */
|
2017-02-08 01:33:45 +07:00
|
|
|
struct intel_cdclk_state hw;
|
|
|
|
} cdclk;
|
2016-05-14 03:41:32 +07:00
|
|
|
|
2013-09-02 21:22:25 +07:00
|
|
|
/**
|
|
|
|
* wq - Driver workqueue for GEM.
|
|
|
|
*
|
|
|
|
* NOTE: Work items scheduled here are not allowed to grab any modeset
|
|
|
|
* locks, for otherwise the flushing done in the pageflip code will
|
|
|
|
* result in deadlocks.
|
|
|
|
*/
|
2012-11-03 01:55:02 +07:00
|
|
|
struct workqueue_struct *wq;
|
|
|
|
|
2017-11-13 20:36:22 +07:00
|
|
|
/* ordered wq for modesets */
|
|
|
|
struct workqueue_struct *modeset_wq;
|
|
|
|
|
2012-11-03 01:55:02 +07:00
|
|
|
/* Display functions */
|
|
|
|
struct drm_i915_display_funcs display;
|
|
|
|
|
|
|
|
/* PCH chipset type */
|
|
|
|
enum intel_pch pch_type;
|
2012-11-21 00:12:07 +07:00
|
|
|
unsigned short pch_id;
|
2012-11-03 01:55:02 +07:00
|
|
|
|
|
|
|
unsigned long quirks;
|
|
|
|
|
2016-02-16 16:06:14 +07:00
|
|
|
struct drm_atomic_state *modeset_restore_state;
|
2016-08-06 03:28:27 +07:00
|
|
|
struct drm_modeset_acquire_ctx reset_ctx;
|
2008-07-31 02:06:12 +07:00
|
|
|
|
2016-03-18 15:42:57 +07:00
|
|
|
struct i915_ggtt ggtt; /* VM representing the global address space */
|
2013-01-18 03:45:15 +07:00
|
|
|
|
2012-11-14 23:14:03 +07:00
|
|
|
struct i915_gem_mm mm;
|
2014-08-07 20:20:40 +07:00
|
|
|
DECLARE_HASHTABLE(mm_structs, 7);
|
|
|
|
struct mutex mm_lock;
|
2012-05-02 16:49:32 +07:00
|
|
|
|
2017-09-14 19:39:40 +07:00
|
|
|
struct intel_ppat ppat;
|
|
|
|
|
2012-05-02 16:49:32 +07:00
|
|
|
/* Kernel Modesetting */
|
|
|
|
|
2016-11-01 03:37:05 +07:00
|
|
|
struct intel_crtc *plane_to_crtc_mapping[I915_MAX_PIPES];
|
|
|
|
struct intel_crtc *pipe_to_crtc_mapping[I915_MAX_PIPES];
|
2009-11-18 23:25:18 +07:00
|
|
|
|
2013-10-22 02:04:07 +07:00
|
|
|
#ifdef CONFIG_DEBUG_FS
|
|
|
|
struct intel_pipe_crc pipe_crc[I915_MAX_PIPES];
|
|
|
|
#endif
|
|
|
|
|
2015-12-10 18:33:57 +07:00
|
|
|
/* dpll and cdclk state is protected by connection_mutex */
|
2013-06-05 18:34:06 +07:00
|
|
|
int num_shared_dpll;
|
|
|
|
struct intel_shared_dpll shared_dplls[I915_NUM_PLLS];
|
2016-03-08 22:46:22 +07:00
|
|
|
const struct intel_dpll_mgr *dpll_mgr;
|
2015-12-10 18:33:57 +07:00
|
|
|
|
2016-03-23 20:51:12 +07:00
|
|
|
/*
|
|
|
|
* dpll_lock serializes intel_{prepare,enable,disable}_shared_dpll.
|
|
|
|
* Must be global rather than per dpll, because on some platforms
|
|
|
|
* plls share registers.
|
|
|
|
*/
|
|
|
|
struct mutex dpll_lock;
|
|
|
|
|
2015-12-10 18:33:57 +07:00
|
|
|
unsigned int active_crtcs;
|
2017-08-31 01:57:03 +07:00
|
|
|
/* minimum acceptable cdclk for each pipe */
|
|
|
|
int min_cdclk[I915_MAX_PIPES];
|
2017-10-24 16:52:14 +07:00
|
|
|
/* minimum acceptable voltage level for each pipe */
|
|
|
|
u8 min_voltage_level[I915_MAX_PIPES];
|
2015-12-10 18:33:57 +07:00
|
|
|
|
2013-11-06 13:36:35 +07:00
|
|
|
int dpio_phy_iosf_port[I915_NUM_PHYS_VLV];
|
2012-04-20 23:11:53 +07:00
|
|
|
|
2014-10-07 21:21:26 +07:00
|
|
|
struct i915_workarounds workarounds;
|
2014-08-26 20:44:51 +07:00
|
|
|
|
drm/i915: Track frontbuffer invalidation/flushing
So these are the guts of the new beast. This tracks when a frontbuffer
gets invalidated (due to frontbuffer rendering) and hence should be
constantly scaned out, and when it's flushed again and can be
compressed/one-shot-upload.
Rules for flushing are simple: The frontbuffer needs one more full
upload starting from the next vblank. Which means that the flushing
can _only_ be called once the frontbuffer update has been latched.
But this poses a problem for pageflips: We can't just delay the
flushing until the pageflip is latched, since that would pose the risk
that we override frontbuffer rendering that has been scheduled
in-between the pageflip ioctl and the actual latching.
To handle this track asynchronous invalidations (and also pageflip)
state per-ring and delay any in-between flushing until the rendering
has completed. And also cancel any delayed flushing if we get a new
invalidation request (whether delayed or not).
Also call intel_mark_fb_busy in both cases in all cases to make sure
that we keep the screen at the highest refresh rate both on flips,
synchronous plane updates and for frontbuffer rendering.
v2: Lots of improvements
Suggestions from Chris:
- Move invalidate/flush in flush_*_domain and set_to_*_domain.
- Drop the flush in busy_ioctl since it's redundant. Was a leftover
from an earlier concept to track flips/delayed flushes.
- Don't forget about the initial modeset enable/final disable.
Suggested by Chris.
Track flips accurately, too. Since flips complete independently of
rendering we need to track pending flips in a separate mask. Again if
an invalidate happens we need to cancel the evenutal flush to avoid
races.
v3:
Provide correct header declarations for flip functions. Currently not
needed outside of intel_display.c, but part of the proper interface.
v4: Add proper domain management to fbcon so that the fbcon buffer is
also tracked correctly.
v5: Fixup locking around the fbcon set_to_gtt_domain call.
v6: More comments from Chris:
- Split out fbcon changes.
- Drop superflous checks for potential scanout before calling intel_fb
functions - we can micro-optimize this later.
- s/intel_fb_/intel_fb_obj_/ to make it clear that this deals in gem
object. We already have precedence for fb_obj in the pin_and_fence
functions.
v7: Clarify the semantics of the flip flush handling by renaming
things a bit:
- Don't go through a gem object but take the relevant frontbuffer bits
directly. These functions center on the plane, the actual object is
irrelevant - even a flip to the same object as already active should
cause a flush.
- Add a new intel_frontbuffer_flip for synchronous plane updates. It
currently just calls intel_frontbuffer_flush since the implemenation
differs.
This way we achieve a clear split between one-shot update events on
one side and frontbuffer rendering with potentially a very long delay
between the invalidate and flush.
Chris and I also had some discussions about mark_busy and whether it
is appropriate to call from flush. But mark busy is a state which
should be derived from the 3 events (invalidate, flush, flip) we now
have by the users, like psr does by tracking relevant information in
psr.busy_frontbuffer_bits. DRRS (the only real use of mark_busy for
frontbuffer) needs to have similar logic. With that the overall
mark_busy in the core could be removed.
v8: Only when retiring gpu buffers only flush frontbuffer bits we
actually invalidated in a batch. Just for safety since before any
additional usage/invalidate we should always retire current rendering.
Suggested by Chris Wilson.
v9: Actually use intel_frontbuffer_flip in all appropriate places.
Spotted by Chris.
v10: Address more comments from Chris:
- Don't call _flip in set_base when the crtc is inactive, avoids redunancy
in the modeset case with the initial enabling of all planes.
- Add comments explaining that the initial/final plane enable/disable
still has work left to do before it's fully generic.
v11: Only invalidate for gtt/cpu access when writing. Spotted by Chris.
v12: s/_flush/_flip/ in intel_overlay.c per Chris' comment.
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-06-19 21:01:59 +07:00
|
|
|
struct i915_frontbuffer_tracking fb_tracking;
|
|
|
|
|
2017-01-24 04:29:39 +07:00
|
|
|
struct intel_atomic_helper {
|
|
|
|
struct llist_head free_list;
|
|
|
|
struct work_struct free_work;
|
|
|
|
} atomic_helper;
|
|
|
|
|
2009-08-18 03:31:43 +07:00
|
|
|
u16 orig_clock;
|
2010-01-30 02:27:07 +07:00
|
|
|
|
2009-12-17 13:48:43 +07:00
|
|
|
bool mchbar_need_disable;
|
2010-01-30 02:27:07 +07:00
|
|
|
|
2012-11-03 01:55:07 +07:00
|
|
|
struct intel_l3_parity l3_parity;
|
|
|
|
|
2013-07-05 01:02:05 +07:00
|
|
|
/* Cannot be determined by PCIID. You must always read a register. */
|
2016-04-13 21:26:43 +07:00
|
|
|
u32 edram_cap;
|
2013-07-05 01:02:05 +07:00
|
|
|
|
2017-10-11 04:30:05 +07:00
|
|
|
/*
|
|
|
|
* Protects RPS/RC6 register access and PCU communication.
|
|
|
|
* Must be taken after struct_mutex if nested. Note that
|
|
|
|
* this lock may be held for long periods of time when
|
|
|
|
* talking to hw - so only take it when talking to hw!
|
|
|
|
*/
|
|
|
|
struct mutex pcu_lock;
|
|
|
|
|
2017-10-11 04:30:06 +07:00
|
|
|
/* gen6+ GT PM state */
|
|
|
|
struct intel_gen6_power_mgmt gt_pm;
|
2012-08-09 04:35:35 +07:00
|
|
|
|
2012-08-09 04:35:39 +07:00
|
|
|
/* ilk-only ips/rps state. Everything in here is protected by the global
|
|
|
|
* mchdev_lock in intel_pm.c */
|
2012-11-03 01:55:03 +07:00
|
|
|
struct intel_ilk_power_mgmt ips;
|
2010-02-06 03:42:41 +07:00
|
|
|
|
2013-10-25 21:36:47 +07:00
|
|
|
struct i915_power_domains power_domains;
|
2013-05-30 21:07:11 +07:00
|
|
|
|
2013-10-04 02:15:06 +07:00
|
|
|
struct i915_psr psr;
|
2013-07-12 04:45:00 +07:00
|
|
|
|
2012-11-14 23:14:04 +07:00
|
|
|
struct i915_gpu_error gpu_error;
|
2010-10-01 20:57:56 +07:00
|
|
|
|
2013-05-09 00:45:13 +07:00
|
|
|
struct drm_i915_gem_object *vlv_pctx;
|
|
|
|
|
2010-03-30 12:34:14 +07:00
|
|
|
/* list of fbdev register on this device */
|
|
|
|
struct intel_fbdev *fbdev;
|
2014-08-13 19:09:46 +07:00
|
|
|
struct work_struct fbdev_suspend_work;
|
2011-02-22 05:23:52 +07:00
|
|
|
|
|
|
|
struct drm_property *broadcast_rgb_property;
|
2011-05-13 04:17:24 +07:00
|
|
|
struct drm_property *force_audio_property;
|
2012-05-26 06:56:22 +07:00
|
|
|
|
2015-01-08 22:54:14 +07:00
|
|
|
/* hda/i915 audio component */
|
2015-08-19 15:48:56 +07:00
|
|
|
struct i915_audio_component *audio_component;
|
2015-01-08 22:54:14 +07:00
|
|
|
bool audio_component_registered;
|
2015-09-02 13:11:39 +07:00
|
|
|
/**
|
|
|
|
* av_mutex - mutex for audio/video sync
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
struct mutex av_mutex;
|
2015-01-08 22:54:14 +07:00
|
|
|
|
2017-06-20 18:05:45 +07:00
|
|
|
struct {
|
2018-09-04 22:31:17 +07:00
|
|
|
struct mutex mutex;
|
2017-06-20 18:05:45 +07:00
|
|
|
struct list_head list;
|
2017-06-20 18:05:46 +07:00
|
|
|
struct llist_head free_list;
|
|
|
|
struct work_struct free_work;
|
2017-06-20 18:05:45 +07:00
|
|
|
|
|
|
|
/* The hw wants to have a stable context identifier for the
|
|
|
|
* lifetime of the context (for OA, PASID, faults, etc).
|
|
|
|
* This is limited in execlists to 21 bits.
|
|
|
|
*/
|
|
|
|
struct ida hw_ida;
|
|
|
|
#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */
|
2018-06-02 18:29:45 +07:00
|
|
|
#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */
|
2018-03-02 23:14:58 +07:00
|
|
|
#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */
|
2018-09-04 22:31:17 +07:00
|
|
|
struct list_head hw_id_list;
|
2017-06-20 18:05:45 +07:00
|
|
|
} contexts;
|
2012-11-03 01:55:02 +07:00
|
|
|
|
2012-12-12 01:48:29 +07:00
|
|
|
u32 fdi_rx_config;
|
2012-12-01 21:04:26 +07:00
|
|
|
|
2016-03-15 21:39:56 +07:00
|
|
|
/* Shadow for DISPLAY_PHY_CONTROL which can't be safely read */
|
2015-04-10 22:21:28 +07:00
|
|
|
u32 chv_phy_control;
|
2016-03-15 21:39:56 +07:00
|
|
|
/*
|
|
|
|
* Shadows for CHV DPLL_MD regs to keep the state
|
|
|
|
* checker somewhat working in the presence hardware
|
|
|
|
* crappiness (can't read out DPLL_MD for pipes B & C).
|
|
|
|
*/
|
|
|
|
u32 chv_dpll_md[I915_MAX_PIPES];
|
2016-04-04 21:27:10 +07:00
|
|
|
u32 bxt_phy_grc;
|
2015-04-10 22:21:28 +07:00
|
|
|
|
2014-03-10 16:01:44 +07:00
|
|
|
u32 suspend_count;
|
2018-03-22 21:36:42 +07:00
|
|
|
bool power_domains_suspended;
|
2012-11-03 01:55:02 +07:00
|
|
|
struct i915_suspend_saved_registers regfile;
|
2014-05-05 19:19:56 +07:00
|
|
|
struct vlv_s0ix_state vlv_s0ix_state;
|
2012-11-03 01:55:05 +07:00
|
|
|
|
drm/i915/skl: Add support for the SAGV, fix underrun hangs
Since the watermark calculations for Skylake are still broken, we're apt
to hitting underruns very easily under multi-monitor configurations.
While it would be lovely if this was fixed, it's not. Another problem
that's been coming from this however, is the mysterious issue of
underruns causing full system hangs. An easy way to reproduce this with
a skylake system:
- Get a laptop with a skylake GPU, and hook up two external monitors to
it
- Move the cursor from the built-in LCD to one of the external displays
as quickly as you can
- You'll get a few pipe underruns, and eventually the entire system will
just freeze.
After doing a lot of investigation and reading through the bspec, I
found the existence of the SAGV, which is responsible for adjusting the
system agent voltage and clock frequencies depending on how much power
we need. According to the bspec:
"The display engine access to system memory is blocked during the
adjustment time. SAGV defaults to enabled. Software must use the
GT-driver pcode mailbox to disable SAGV when the display engine is not
able to tolerate the blocking time."
The rest of the bspec goes on to explain that software can simply leave
the SAGV enabled, and disable it when we use interlaced pipes/have more
then one pipe active.
Sure enough, with this patchset the system hangs resulting from pipe
underruns on Skylake have completely vanished on my T460s. Additionally,
the bspec mentions turning off the SAGV with more then one pipe enabled
as a workaround for display underruns. While this patch doesn't entirely
fix that, it looks like it does improve the situation a little bit so
it's likely this is going to be required to make watermarks on Skylake
fully functional.
This will still need additional work in the future: we shouldn't be
enabling the SAGV if any of the currently enabled planes can't enable WM
levels that introduce latencies >= 30 µs.
Changes since v11:
- Add skl_can_enable_sagv()
- Make sure we don't enable SAGV when not all planes can enable
watermarks >= the SAGV engine block time. I was originally going to
save this for later, but I recently managed to run into a machine
that was having problems with a single pipe configuration + SAGV.
- Make comparisons to I915_SKL_SAGV_NOT_CONTROLLED explicit
- Change I915_SAGV_DYNAMIC_FREQ to I915_SAGV_ENABLE
- Move printks outside of mutexes
- Don't print error messages twice
Changes since v10:
- Apparently sandybridge_pcode_read actually writes values and reads
them back, despite it's misleading function name. This means we've
been doing this mostly wrong and have been writing garbage to the
SAGV control. Because of this, we no longer attempt to read the SAGV
status during initialization (since there are no helpers for this).
- mlankhorst noticed that this patch was breaking on some very early
pre-release Skylake machines, which apparently don't allow you to
disable the SAGV. To prevent machines from failing tests due to SAGV
errors, if the first time we try to control the SAGV results in the
mailbox indicating an invalid command, we just disable future attempts
to control the SAGV state by setting dev_priv->skl_sagv_status to
I915_SKL_SAGV_NOT_CONTROLLED and make a note of it in dmesg.
- Move mutex_unlock() a little higher in skl_enable_sagv(). This
doesn't actually fix anything, but lets us release the lock a little
sooner since we're finished with it.
Changes since v9:
- Only enable/disable sagv on Skylake
Changes since v8:
- Add intel_state->modeset guard to the conditional for
skl_enable_sagv()
Changes since v7:
- Remove GEN9_SAGV_LOW_FREQ, replace with GEN9_SAGV_IS_ENABLED (that's
all we use it for anyway)
- Use GEN9_SAGV_IS_ENABLED instead of 0x1 for clarification
- Fix a styling error that snuck past me
Changes since v6:
- Protect skl_enable_sagv() with intel_state->modeset conditional in
intel_atomic_commit_tail()
Changes since v5:
- Don't use is_power_of_2. Makes things confusing
- Don't use the old state to figure out whether or not to
enable/disable the sagv, use the new one
- Split the loop in skl_disable_sagv into it's own function
- Move skl_sagv_enable/disable() calls into intel_atomic_commit_tail()
Changes since v4:
- Use is_power_of_2 against active_crtcs to check whether we have > 1
pipe enabled
- Fix skl_sagv_get_hw_state(): (temp & 0x1) indicates disabled, 0x0
enabled
- Call skl_sagv_enable/disable() from pre/post-plane updates
Changes since v3:
- Use time_before() to compare timeout to jiffies
Changes since v2:
- Really apply minor style nitpicks to patch this time
Changes since v1:
- Added comments about this probably being one of the requirements to
fixing Skylake's watermark issues
- Minor style nitpicks from Matt Roper
- Disable these functions on Broxton, since it doesn't have an SAGV
Signed-off-by: Lyude <cpaul@redhat.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1471463761-26796-3-git-send-email-cpaul@redhat.com
[mlankhorst: ENOSYS -> ENXIO, whitespace fixes]
2016-08-18 02:55:54 +07:00
|
|
|
enum {
|
2016-09-23 04:00:27 +07:00
|
|
|
I915_SAGV_UNKNOWN = 0,
|
|
|
|
I915_SAGV_DISABLED,
|
|
|
|
I915_SAGV_ENABLED,
|
|
|
|
I915_SAGV_NOT_CONTROLLED
|
|
|
|
} sagv_status;
|
drm/i915/skl: Add support for the SAGV, fix underrun hangs
Since the watermark calculations for Skylake are still broken, we're apt
to hitting underruns very easily under multi-monitor configurations.
While it would be lovely if this was fixed, it's not. Another problem
that's been coming from this however, is the mysterious issue of
underruns causing full system hangs. An easy way to reproduce this with
a skylake system:
- Get a laptop with a skylake GPU, and hook up two external monitors to
it
- Move the cursor from the built-in LCD to one of the external displays
as quickly as you can
- You'll get a few pipe underruns, and eventually the entire system will
just freeze.
After doing a lot of investigation and reading through the bspec, I
found the existence of the SAGV, which is responsible for adjusting the
system agent voltage and clock frequencies depending on how much power
we need. According to the bspec:
"The display engine access to system memory is blocked during the
adjustment time. SAGV defaults to enabled. Software must use the
GT-driver pcode mailbox to disable SAGV when the display engine is not
able to tolerate the blocking time."
The rest of the bspec goes on to explain that software can simply leave
the SAGV enabled, and disable it when we use interlaced pipes/have more
then one pipe active.
Sure enough, with this patchset the system hangs resulting from pipe
underruns on Skylake have completely vanished on my T460s. Additionally,
the bspec mentions turning off the SAGV with more then one pipe enabled
as a workaround for display underruns. While this patch doesn't entirely
fix that, it looks like it does improve the situation a little bit so
it's likely this is going to be required to make watermarks on Skylake
fully functional.
This will still need additional work in the future: we shouldn't be
enabling the SAGV if any of the currently enabled planes can't enable WM
levels that introduce latencies >= 30 µs.
Changes since v11:
- Add skl_can_enable_sagv()
- Make sure we don't enable SAGV when not all planes can enable
watermarks >= the SAGV engine block time. I was originally going to
save this for later, but I recently managed to run into a machine
that was having problems with a single pipe configuration + SAGV.
- Make comparisons to I915_SKL_SAGV_NOT_CONTROLLED explicit
- Change I915_SAGV_DYNAMIC_FREQ to I915_SAGV_ENABLE
- Move printks outside of mutexes
- Don't print error messages twice
Changes since v10:
- Apparently sandybridge_pcode_read actually writes values and reads
them back, despite it's misleading function name. This means we've
been doing this mostly wrong and have been writing garbage to the
SAGV control. Because of this, we no longer attempt to read the SAGV
status during initialization (since there are no helpers for this).
- mlankhorst noticed that this patch was breaking on some very early
pre-release Skylake machines, which apparently don't allow you to
disable the SAGV. To prevent machines from failing tests due to SAGV
errors, if the first time we try to control the SAGV results in the
mailbox indicating an invalid command, we just disable future attempts
to control the SAGV state by setting dev_priv->skl_sagv_status to
I915_SKL_SAGV_NOT_CONTROLLED and make a note of it in dmesg.
- Move mutex_unlock() a little higher in skl_enable_sagv(). This
doesn't actually fix anything, but lets us release the lock a little
sooner since we're finished with it.
Changes since v9:
- Only enable/disable sagv on Skylake
Changes since v8:
- Add intel_state->modeset guard to the conditional for
skl_enable_sagv()
Changes since v7:
- Remove GEN9_SAGV_LOW_FREQ, replace with GEN9_SAGV_IS_ENABLED (that's
all we use it for anyway)
- Use GEN9_SAGV_IS_ENABLED instead of 0x1 for clarification
- Fix a styling error that snuck past me
Changes since v6:
- Protect skl_enable_sagv() with intel_state->modeset conditional in
intel_atomic_commit_tail()
Changes since v5:
- Don't use is_power_of_2. Makes things confusing
- Don't use the old state to figure out whether or not to
enable/disable the sagv, use the new one
- Split the loop in skl_disable_sagv into it's own function
- Move skl_sagv_enable/disable() calls into intel_atomic_commit_tail()
Changes since v4:
- Use is_power_of_2 against active_crtcs to check whether we have > 1
pipe enabled
- Fix skl_sagv_get_hw_state(): (temp & 0x1) indicates disabled, 0x0
enabled
- Call skl_sagv_enable/disable() from pre/post-plane updates
Changes since v3:
- Use time_before() to compare timeout to jiffies
Changes since v2:
- Really apply minor style nitpicks to patch this time
Changes since v1:
- Added comments about this probably being one of the requirements to
fixing Skylake's watermark issues
- Minor style nitpicks from Matt Roper
- Disable these functions on Broxton, since it doesn't have an SAGV
Signed-off-by: Lyude <cpaul@redhat.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1471463761-26796-3-git-send-email-cpaul@redhat.com
[mlankhorst: ENOSYS -> ENXIO, whitespace fixes]
2016-08-18 02:55:54 +07:00
|
|
|
|
2013-08-01 20:18:50 +07:00
|
|
|
struct {
|
|
|
|
/*
|
|
|
|
* Raw watermark latency values:
|
|
|
|
* in 0.1us units for WM0,
|
|
|
|
* in 0.5us units for WM1+.
|
|
|
|
*/
|
|
|
|
/* primary */
|
|
|
|
uint16_t pri_latency[5];
|
|
|
|
/* sprite */
|
|
|
|
uint16_t spr_latency[5];
|
|
|
|
/* cursor */
|
|
|
|
uint16_t cur_latency[5];
|
2014-11-05 00:06:38 +07:00
|
|
|
/*
|
|
|
|
* Raw watermark memory latency values
|
|
|
|
* for SKL for all 8 levels
|
|
|
|
* in 1us units.
|
|
|
|
*/
|
|
|
|
uint16_t skl_latency[8];
|
2013-10-09 23:18:03 +07:00
|
|
|
|
|
|
|
/* current hardware state */
|
2014-11-05 00:06:42 +07:00
|
|
|
union {
|
|
|
|
struct ilk_wm_values hw;
|
2018-04-09 10:41:00 +07:00
|
|
|
struct skl_ddb_values skl_hw;
|
2015-03-06 02:19:45 +07:00
|
|
|
struct vlv_wm_values vlv;
|
2017-04-22 01:14:29 +07:00
|
|
|
struct g4x_wm_values g4x;
|
2014-11-05 00:06:42 +07:00
|
|
|
};
|
2015-09-09 01:05:12 +07:00
|
|
|
|
|
|
|
uint8_t max_level;
|
drm/i915: Add two-stage ILK-style watermark programming (v11)
In addition to calculating final watermarks, let's also pre-calculate a
set of intermediate watermark values at atomic check time. These
intermediate watermarks are a combination of the watermarks for the old
state and the new state; they should satisfy the requirements of both
states which means they can be programmed immediately when we commit the
atomic state (without waiting for a vblank). Once the vblank does
happen, we can then re-program watermarks to the more optimal final
value.
v2: Significant rebasing/rewriting.
v3:
- Move 'need_postvbl_update' flag to CRTC state (Daniel)
- Don't forget to check intermediate watermark values for validity
(Maarten)
- Don't due async watermark optimization; just do it at the end of the
atomic transaction, after waiting for vblanks. We do want it to be
async eventually, but adding that now will cause more trouble for
Maarten's in-progress work. (Maarten)
- Don't allocate space in crtc_state for intermediate watermarks on
platforms that don't need it (gen9+).
- Move WaCxSRDisabledForSpriteScaling:ivb into intel_begin_crtc_commit
now that ilk_update_wm is gone.
v4:
- Add a wm_mutex to cover updates to intel_crtc->active and the
need_postvbl_update flag. Since we don't have async yet it isn't
terribly important yet, but might as well add it now.
- Change interface to program watermarks. Platforms will now expose
.initial_watermarks() and .optimize_watermarks() functions to do
watermark programming. These should lock wm_mutex, copy the
appropriate state values into intel_crtc->active, and then call
the internal program watermarks function.
v5:
- Skip intermediate watermark calculation/check during initial hardware
readout since we don't trust the existing HW values (and don't have
valid values of our own yet).
- Don't try to call .optimize_watermarks() on platforms that don't have
atomic watermarks yet. (Maarten)
v6:
- Rebase
v7:
- Further rebase
v8:
- A few minor indentation and line length fixes
v9:
- Yet another rebase since Maarten's patches reworked a bunch of the
code (wm_pre, wm_post, etc.) that this was previously based on.
v10:
- Move wm_mutex to dev_priv to protect against racing commits against
disjoint CRTC sets. (Maarten)
- Drop unnecessary clearing of cstate->wm.need_postvbl_update (Maarten)
v11:
- Now that we've moved to atomic watermark updates, make sure we call
the proper function to program watermarks in
{ironlake,haswell}_crtc_enable(); the failure to do so on the
previous patch iteration led to us not actually programming the
watermarks before turning on the CRTC, which was the cause of the
underruns that the CI system was seeing.
- Fix inverted logic for determining when to optimize watermarks. We
were needlessly optimizing when the intermediate/optimal values were
the same (harmless), but not actually optimizing when they differed
(also harmless, but wasteful from a power/bandwidth perspective).
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1456276813-5689-1-git-send-email-matthew.d.roper@intel.com
2016-02-24 08:20:13 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Should be held around atomic WM register writing; also
|
|
|
|
* protects * intel_crtc->wm.active and
|
|
|
|
* cstate->wm.need_postvbl_update.
|
|
|
|
*/
|
|
|
|
struct mutex wm_mutex;
|
2016-05-12 21:06:02 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Set during HW readout of watermarks/DDB. Some platforms
|
|
|
|
* need to know when we're still using BIOS-provided values
|
|
|
|
* (which we don't fully trust).
|
|
|
|
*/
|
|
|
|
bool distrust_bios_wm;
|
2013-08-01 20:18:50 +07:00
|
|
|
} wm;
|
|
|
|
|
2018-08-24 16:32:21 +07:00
|
|
|
struct dram_info {
|
|
|
|
bool valid;
|
2018-08-31 18:09:42 +07:00
|
|
|
bool valid_dimm;
|
|
|
|
bool is_16gb_dimm;
|
2018-08-24 16:32:21 +07:00
|
|
|
u8 num_channels;
|
|
|
|
enum dram_rank {
|
|
|
|
I915_DRAM_RANK_INVALID = 0,
|
|
|
|
I915_DRAM_RANK_SINGLE,
|
|
|
|
I915_DRAM_RANK_DUAL
|
|
|
|
} rank;
|
|
|
|
u32 bandwidth_kbps;
|
2018-08-24 16:32:25 +07:00
|
|
|
bool symmetric_memory;
|
2018-08-24 16:32:21 +07:00
|
|
|
} dram_info;
|
|
|
|
|
2017-10-11 04:30:04 +07:00
|
|
|
struct i915_runtime_pm runtime_pm;
|
2013-12-07 05:32:13 +07:00
|
|
|
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
struct {
|
|
|
|
bool initialized;
|
2016-11-08 02:49:52 +07:00
|
|
|
|
2016-11-08 02:49:53 +07:00
|
|
|
struct kobject *metrics_kobj;
|
2016-11-08 02:49:54 +07:00
|
|
|
struct ctl_table_header *sysctl_header;
|
2016-11-08 02:49:53 +07:00
|
|
|
|
2017-08-04 00:05:50 +07:00
|
|
|
/*
|
|
|
|
* Lock associated with adding/modifying/removing OA configs
|
|
|
|
* in dev_priv->perf.metrics_idr.
|
|
|
|
*/
|
|
|
|
struct mutex metrics_lock;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* List of dynamic configurations, you need to hold
|
|
|
|
* dev_priv->perf.metrics_lock to access it.
|
|
|
|
*/
|
|
|
|
struct idr metrics_idr;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Lock associated with anything below within this structure
|
|
|
|
* except exclusive_stream.
|
|
|
|
*/
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
struct mutex lock;
|
|
|
|
struct list_head streams;
|
2016-11-08 02:49:51 +07:00
|
|
|
|
|
|
|
struct {
|
2017-08-04 00:05:50 +07:00
|
|
|
/*
|
|
|
|
* The stream currently using the OA unit. If accessed
|
|
|
|
* outside a syscall associated to its file
|
|
|
|
* descriptor, you need to hold
|
|
|
|
* dev_priv->drm.struct_mutex.
|
|
|
|
*/
|
2016-11-08 02:49:52 +07:00
|
|
|
struct i915_perf_stream *exclusive_stream;
|
|
|
|
|
2018-05-18 04:26:32 +07:00
|
|
|
struct intel_context *pinned_ctx;
|
2016-11-08 02:49:52 +07:00
|
|
|
u32 specific_ctx_id;
|
2018-06-02 18:29:46 +07:00
|
|
|
u32 specific_ctx_id_mask;
|
2016-11-08 02:49:52 +07:00
|
|
|
|
|
|
|
struct hrtimer poll_check_timer;
|
|
|
|
wait_queue_head_t poll_wq;
|
|
|
|
bool pollin;
|
|
|
|
|
2017-05-11 22:43:31 +07:00
|
|
|
/**
|
|
|
|
* For rate limiting any notifications of spurious
|
|
|
|
* invalid OA reports
|
|
|
|
*/
|
|
|
|
struct ratelimit_state spurious_report_rs;
|
|
|
|
|
2016-11-08 02:49:52 +07:00
|
|
|
bool periodic;
|
|
|
|
int period_exponent;
|
|
|
|
|
2017-08-03 23:58:08 +07:00
|
|
|
struct i915_oa_config test_config;
|
2016-11-08 02:49:52 +07:00
|
|
|
|
|
|
|
struct {
|
|
|
|
struct i915_vma *vma;
|
|
|
|
u8 *vaddr;
|
2017-06-13 18:23:03 +07:00
|
|
|
u32 last_ctx_id;
|
2016-11-08 02:49:52 +07:00
|
|
|
int format;
|
|
|
|
int format_size;
|
2017-05-11 22:43:26 +07:00
|
|
|
|
2017-05-11 22:43:28 +07:00
|
|
|
/**
|
|
|
|
* Locks reads and writes to all head/tail state
|
|
|
|
*
|
|
|
|
* Consider: the head and tail pointer state
|
|
|
|
* needs to be read consistently from a hrtimer
|
|
|
|
* callback (atomic context) and read() fop
|
|
|
|
* (user context) with tail pointer updates
|
|
|
|
* happening in atomic context and head updates
|
|
|
|
* in user context and the (unlikely)
|
|
|
|
* possibility of read() errors needing to
|
|
|
|
* reset all head/tail state.
|
|
|
|
*
|
|
|
|
* Note: Contention or performance aren't
|
|
|
|
* currently a significant concern here
|
|
|
|
* considering the relatively low frequency of
|
|
|
|
* hrtimer callbacks (5ms period) and that
|
|
|
|
* reads typically only happen in response to a
|
|
|
|
* hrtimer event and likely complete before the
|
|
|
|
* next callback.
|
|
|
|
*
|
|
|
|
* Note: This lock is not held *while* reading
|
|
|
|
* and copying data to userspace so the value
|
|
|
|
* of head observed in htrimer callbacks won't
|
|
|
|
* represent any partial consumption of data.
|
|
|
|
*/
|
|
|
|
spinlock_t ptr_lock;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* One 'aging' tail pointer and one 'aged'
|
|
|
|
* tail pointer ready to used for reading.
|
|
|
|
*
|
|
|
|
* Initial values of 0xffffffff are invalid
|
|
|
|
* and imply that an update is required
|
|
|
|
* (and should be ignored by an attempted
|
|
|
|
* read)
|
|
|
|
*/
|
|
|
|
struct {
|
|
|
|
u32 offset;
|
|
|
|
} tails[2];
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Index for the aged tail ready to read()
|
|
|
|
* data up to.
|
|
|
|
*/
|
|
|
|
unsigned int aged_tail_idx;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* A monotonic timestamp for when the current
|
|
|
|
* aging tail pointer was read; used to
|
|
|
|
* determine when it is old enough to trust.
|
|
|
|
*/
|
|
|
|
u64 aging_timestamp;
|
|
|
|
|
2017-05-11 22:43:26 +07:00
|
|
|
/**
|
|
|
|
* Although we can always read back the head
|
|
|
|
* pointer register, we prefer to avoid
|
|
|
|
* trusting the HW state, just to avoid any
|
|
|
|
* risk that some hardware condition could
|
|
|
|
* somehow bump the head pointer unpredictably
|
|
|
|
* and cause us to forward the wrong OA buffer
|
|
|
|
* data to userspace.
|
|
|
|
*/
|
|
|
|
u32 head;
|
2016-11-08 02:49:52 +07:00
|
|
|
} oa_buffer;
|
|
|
|
|
|
|
|
u32 gen7_latched_oastatus1;
|
2017-06-13 18:23:03 +07:00
|
|
|
u32 ctx_oactxctrl_offset;
|
|
|
|
u32 ctx_flexeu0_offset;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The RPT_ID/reason field for Gen8+ includes a bit
|
|
|
|
* to determine if the CTX ID in the report is valid
|
|
|
|
* but the specific bit differs between Gen 8 and 9
|
|
|
|
*/
|
|
|
|
u32 gen8_valid_ctx_bit;
|
2016-11-08 02:49:52 +07:00
|
|
|
|
|
|
|
struct i915_oa_ops ops;
|
|
|
|
const struct i915_oa_format *oa_formats;
|
2016-11-08 02:49:51 +07:00
|
|
|
} oa;
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
} perf;
|
|
|
|
|
2014-07-24 23:04:21 +07:00
|
|
|
/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
|
|
|
|
struct {
|
2016-09-09 20:11:53 +07:00
|
|
|
void (*resume)(struct drm_i915_private *);
|
2016-03-16 18:00:40 +07:00
|
|
|
void (*cleanup_engine)(struct intel_engine_cs *engine);
|
2016-07-04 14:08:31 +07:00
|
|
|
|
2018-04-30 20:15:02 +07:00
|
|
|
struct list_head timelines;
|
2018-04-30 20:15:03 +07:00
|
|
|
|
|
|
|
struct list_head active_rings;
|
2018-05-04 02:51:14 +07:00
|
|
|
struct list_head closed_vma;
|
2016-10-28 19:58:56 +07:00
|
|
|
u32 active_requests;
|
2018-04-30 20:15:00 +07:00
|
|
|
u32 request_serial;
|
2016-10-28 19:58:46 +07:00
|
|
|
|
2016-07-04 14:08:31 +07:00
|
|
|
/**
|
|
|
|
* Is the GPU currently considered idle, or busy executing
|
|
|
|
* userspace requests? Whilst idle, we allow runtime power
|
|
|
|
* management to power down the hardware and display clocks.
|
|
|
|
* In order to reduce the effect on performance, there
|
|
|
|
* is a slight delay before we do so.
|
|
|
|
*/
|
|
|
|
bool awake;
|
|
|
|
|
2018-01-24 18:36:07 +07:00
|
|
|
/**
|
|
|
|
* The number of times we have woken up.
|
|
|
|
*/
|
|
|
|
unsigned int epoch;
|
|
|
|
#define I915_EPOCH_INVALID 0
|
|
|
|
|
2016-07-04 14:08:31 +07:00
|
|
|
/**
|
|
|
|
* We leave the user IRQ off as much as possible,
|
|
|
|
* but this means that requests will finish and never
|
|
|
|
* be retired once the system goes idle. Set a timer to
|
|
|
|
* fire periodically while the ring is running. When it
|
|
|
|
* fires, go retire requests.
|
|
|
|
*/
|
|
|
|
struct delayed_work retire_work;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* When we detect an idle GPU, we want to turn on
|
|
|
|
* powersaving features. So once we see that there
|
|
|
|
* are no more requests outstanding and no more
|
|
|
|
* arrive within a small period of time, we fire
|
|
|
|
* off the idle_work.
|
|
|
|
*/
|
|
|
|
struct delayed_work idle_work;
|
2016-10-25 19:16:02 +07:00
|
|
|
|
|
|
|
ktime_t last_init_time;
|
2014-07-24 23:04:21 +07:00
|
|
|
} gt;
|
|
|
|
|
2015-09-08 22:05:45 +07:00
|
|
|
/* perform PHY state sanity checks? */
|
|
|
|
bool chv_phy_assert[2];
|
|
|
|
|
2016-12-01 22:49:34 +07:00
|
|
|
bool ipc_enabled;
|
|
|
|
|
drm/i915/dp: DP audio API changes for MST
DP MST provides the capability to send multiple video and audio streams
through a single port. This requires the API's between i915 and audio
drivers to distinguish between multiple audio capable displays that can be
connected to a port. Currently only the port identity is shared in the
APIs. This patch adds support for MST with an additional parameter
'int pipe'. The existing parameter 'port' does not change it's meaning.
pipe =
MST : display pipe that the stream originates from
Non-MST : -1
Affected APIs:
struct i915_audio_component_ops
- int (*sync_audio_rate)(struct device *, int port, int rate);
+ int (*sync_audio_rate)(struct device *, int port, int pipe,
+ int rate);
- int (*get_eld)(struct device *, int port, bool *enabled,
- unsigned char *buf, int max_bytes);
+ int (*get_eld)(struct device *, int port, int pipe,
+ bool *enabled, unsigned char *buf, int max_bytes);
struct i915_audio_component_audio_ops
- void (*pin_eld_notify)(void *audio_ptr, int port);
+ void (*pin_eld_notify)(void *audio_ptr, int port, int pipe);
This patch makes dummy changes in the audio drivers (thanks Libin) for
build to succeed. The audio side drivers will send the right 'pipe' values
for MST in patches that will follow.
v2:
Renamed the new API parameter from 'dev_id' to 'pipe'. (Jim, Ville)
Included Asoc driver API compatibility changes from Jeeja.
Added WARN_ON() for invalid pipe in get_saved_encoder(). (Takashi)
Added comment for av_enc_map[] definition. (Takashi)
v3:
Fixed logic error introduced while renaming 'dev_id' as 'pipe' (Ville)
Renamed get_saved_encoder() to get_saved_enc() to reduce line length
v4:
Rebased.
Parameter check for pipe < -1 values in get_saved_enc() (Ville)
Switched to for_each_pipe() in get_saved_enc() (Ville)
Renamed 'pipe' to 'dev_id' in audio side code (Takashi)
v5:
Included a comment for the dev_id arg. (Libin)
Signed-off-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1474488168-2343-1-git-send-email-dhinakaran.pandiyan@intel.com
2016-09-22 03:02:48 +07:00
|
|
|
/* Used to save the pipe-to-encoder mapping for audio */
|
|
|
|
struct intel_encoder *av_enc_map[I915_MAX_PIPES];
|
2015-12-01 00:19:39 +07:00
|
|
|
|
2017-01-25 05:57:49 +07:00
|
|
|
/* necessary resource sharing with HDMI LPE audio driver. */
|
|
|
|
struct {
|
|
|
|
struct platform_device *platdev;
|
|
|
|
int irq;
|
|
|
|
} lpe_audio;
|
|
|
|
|
drm/i915/pmu: Expose a PMU interface for perf queries
From: Chris Wilson <chris@chris-wilson.co.uk>
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
From: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
The first goal is to be able to measure GPU (and invidual ring) busyness
without having to poll registers from userspace. (Which not only incurs
holding the forcewake lock indefinitely, perturbing the system, but also
runs the risk of hanging the machine.) As an alternative we can use the
perf event counter interface to sample the ring registers periodically
and send those results to userspace.
Functionality we are exporting to userspace is via the existing perf PMU
API and can be exercised via the existing tools. For example:
perf stat -a -e i915/rcs0-busy/ -I 1000
Will print the render engine busynnes once per second. All the performance
counters can be enumerated (perf list) and have their unit of measure
correctly reported in sysfs.
v1-v2 (Chris Wilson):
v2: Use a common timer for the ring sampling.
v3: (Tvrtko Ursulin)
* Decouple uAPI from i915 engine ids.
* Complete uAPI defines.
* Refactor some code to helpers for clarity.
* Skip sampling disabled engines.
* Expose counters in sysfs.
* Pass in fake regs to avoid null ptr deref in perf core.
* Convert to class/instance uAPI.
* Use shared driver code for rc6 residency, power and frequency.
v4: (Dmitry Rogozhkin)
* Register PMU with .task_ctx_nr=perf_invalid_context
* Expose cpumask for the PMU with the single CPU in the mask
* Properly support pmu->stop(): it should call pmu->read()
* Properly support pmu->del(): it should call stop(event, PERF_EF_UPDATE)
* Introduce refcounting of event subscriptions.
* Make pmu.busy_stats a refcounter to avoid busy stats going away
with some deleted event.
* Expose cpumask for i915 PMU to avoid multiple events creation of
the same type followed by counter aggregation by perf-stat.
* Track CPUs getting online/offline to migrate perf context. If (likely)
cpumask will initially set CPU0, CONFIG_BOOTPARAM_HOTPLUG_CPU0 will be
needed to see effect of CPU status tracking.
* End result is that only global events are supported and perf stat
works correctly.
* Deny perf driver level sampling - it is prohibited for uncore PMU.
v5: (Tvrtko Ursulin)
* Don't hardcode number of engine samplers.
* Rewrite event ref-counting for correctness and simplicity.
* Store initial counter value when starting already enabled events
to correctly report values to all listeners.
* Fix RC6 residency readout.
* Comments, GPL header.
v6:
* Add missing entry to v4 changelog.
* Fix accounting in CPU hotplug case by copying the approach from
arch/x86/events/intel/cstate.c. (Dmitry Rogozhkin)
v7:
* Log failure message only on failure.
* Remove CPU hotplug notification state on unregister.
v8:
* Fix error unwind on failed registration.
* Checkpatch cleanup.
v9:
* Drop the energy metric, it is available via intel_rapl_perf.
(Ville Syrjälä)
* Use HAS_RC6(p). (Chris Wilson)
* Handle unsupported non-engine events. (Dmitry Rogozhkin)
* Rebase for intel_rc6_residency_ns needing caller managed
runtime pm.
* Drop HAS_RC6 checks from the read callback since creating those
events will be rejected at init time already.
* Add counter units to sysfs so perf stat output is nicer.
* Cleanup the attribute tables for brevity and readability.
v10:
* Fixed queued accounting.
v11:
* Move intel_engine_lookup_user to intel_engine_cs.c
* Commit update. (Joonas Lahtinen)
v12:
* More accurate sampling. (Chris Wilson)
* Store and report frequency in MHz for better usability from
perf stat.
* Removed metrics: queued, interrupts, rc6 counters.
* Sample engine busyness based on seqno difference only
for less MMIO (and forcewake) on all platforms. (Chris Wilson)
v13:
* Comment spelling, use mul_u32_u32 to work around potential GCC
issue and somne code alignment changes. (Chris Wilson)
v14:
* Rebase.
v15:
* Rebase for RPS refactoring.
v16:
* Use the dynamic slot in the CPU hotplug state machine so that we are
free to setup our state as multi-instance. Previously we were re-using
the CPUHP_AP_PERF_X86_UNCORE_ONLINE slot which is neither used as
multi-instance, nor owned by our driver to start with.
* Register the CPU hotplug handlers after the PMU, otherwise the callback
will get called before the PMU is initialized which can end up in
perf_pmu_migrate_context with an un-initialized base.
* Added workaround for a probable bug in cpuhp core.
v17:
* Remove workaround for the cpuhp bug.
v18:
* Rebase for drm_i915_gem_engine_class getting upstream before us.
v19:
* Rebase. (trivial)
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171121181852.16128-2-tvrtko.ursulin@linux.intel.com
2017-11-22 01:18:45 +07:00
|
|
|
struct i915_pmu pmu;
|
|
|
|
|
2014-05-21 22:37:52 +07:00
|
|
|
/*
|
|
|
|
* NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
|
|
|
|
* will be rejected. Instead look for a better place.
|
|
|
|
*/
|
2014-03-31 18:27:22 +07:00
|
|
|
};
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2018-08-24 16:32:22 +07:00
|
|
|
struct dram_channel_info {
|
|
|
|
struct info {
|
|
|
|
u8 size, width;
|
|
|
|
enum dram_rank rank;
|
|
|
|
} l_info, s_info;
|
|
|
|
enum dram_rank rank;
|
2018-08-31 18:09:42 +07:00
|
|
|
bool is_16gb_dimm;
|
2018-08-24 16:32:22 +07:00
|
|
|
};
|
|
|
|
|
2013-08-02 00:39:55 +07:00
|
|
|
static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
|
|
|
|
{
|
2016-06-24 20:00:21 +07:00
|
|
|
return container_of(dev, struct drm_i915_private, drm);
|
2013-08-02 00:39:55 +07:00
|
|
|
}
|
|
|
|
|
2016-08-22 17:32:42 +07:00
|
|
|
static inline struct drm_i915_private *kdev_to_i915(struct device *kdev)
|
2015-01-08 22:54:13 +07:00
|
|
|
{
|
2016-08-22 17:32:42 +07:00
|
|
|
return to_i915(dev_get_drvdata(kdev));
|
2015-01-08 22:54:13 +07:00
|
|
|
}
|
|
|
|
|
2018-03-14 07:32:50 +07:00
|
|
|
static inline struct drm_i915_private *wopcm_to_i915(struct intel_wopcm *wopcm)
|
|
|
|
{
|
|
|
|
return container_of(wopcm, struct drm_i915_private, wopcm);
|
|
|
|
}
|
|
|
|
|
2015-08-12 21:43:36 +07:00
|
|
|
static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
|
|
|
|
{
|
|
|
|
return container_of(guc, struct drm_i915_private, guc);
|
|
|
|
}
|
|
|
|
|
2017-03-14 21:28:06 +07:00
|
|
|
static inline struct drm_i915_private *huc_to_i915(struct intel_huc *huc)
|
|
|
|
{
|
|
|
|
return container_of(huc, struct drm_i915_private, huc);
|
|
|
|
}
|
|
|
|
|
2016-03-24 18:20:38 +07:00
|
|
|
/* Simple iterator over all initialised engines */
|
drm/i915: Allocate intel_engine_cs structure only for the enabled engines
With the possibility of addition of many more number of rings in future,
the drm_i915_private structure could bloat as an array, of type
intel_engine_cs, is embedded inside it.
struct intel_engine_cs engine[I915_NUM_ENGINES];
Though this is still fine as generally there is only a single instance of
drm_i915_private structure used, but not all of the possible rings would be
enabled or active on most of the platforms. Some memory can be saved by
allocating intel_engine_cs structure only for the enabled/active engines.
Currently the engine/ring ID is kept static and dev_priv->engine[] is simply
indexed using the enums defined in intel_engine_id.
To save memory and continue using the static engine/ring IDs, 'engine' is
defined as an array of pointers.
struct intel_engine_cs *engine[I915_NUM_ENGINES];
dev_priv->engine[engine_ID] will be NULL for disabled engine instances.
There is a text size reduction of 928 bytes, from 1028200 to 1027272, for
i915.o file (but for i915.ko file text size remain same as 1193131 bytes).
v2:
- Remove the engine iterator field added in drm_i915_private structure,
instead pass a local iterator variable to the for_each_engine**
macros. (Chris)
- Do away with intel_engine_initialized() and instead directly use the
NULL pointer check on engine pointer. (Chris)
v3:
- Remove for_each_engine_id() macro, as the updated macro for_each_engine()
can be used in place of it. (Chris)
- Protect the access to Render engine Fault register with a NULL check, as
engine specific init is done later in Driver load sequence.
v4:
- Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris)
- Kill the superfluous init_engine_lists().
v5:
- Cleanup the intel_engines_init() & intel_engines_setup(), with respect to
allocation of intel_engine_cs structure. (Chris)
v6:
- Rebase.
v7:
- Optimize the for_each_engine_masked() macro. (Chris)
- Change the type of 'iter' local variable to enum intel_engine_id. (Chris)
- Rebase.
v8: Rebase.
v9: Rebase.
v10:
- For index calculation use engine ID instead of pointer based arithmetic in
intel_engine_sync_index() as engine pointers are not contiguous now (Chris)
- For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas)
- Use for_each_engine macro for cleanup in intel_engines_init() and remove
check for NULL engine pointer in cleanup() routines. (Joonas)
v11: Rebase.
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Akash Goel <akash.goel@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 00:14:48 +07:00
|
|
|
#define for_each_engine(engine__, dev_priv__, id__) \
|
|
|
|
for ((id__) = 0; \
|
|
|
|
(id__) < I915_NUM_ENGINES; \
|
|
|
|
(id__)++) \
|
|
|
|
for_each_if ((engine__) = (dev_priv__)->engine[(id__)])
|
2016-03-24 01:19:53 +07:00
|
|
|
|
|
|
|
/* Iterator over subset of engines selected by mask */
|
2016-08-27 14:54:01 +07:00
|
|
|
#define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \
|
2018-04-06 18:44:07 +07:00
|
|
|
for ((tmp__) = (mask__) & INTEL_INFO(dev_priv__)->ring_mask; \
|
|
|
|
(tmp__) ? \
|
|
|
|
((engine__) = (dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : \
|
|
|
|
0;)
|
2016-03-16 22:54:00 +07:00
|
|
|
|
2012-02-14 10:45:36 +07:00
|
|
|
enum hdmi_force_audio {
|
|
|
|
HDMI_AUDIO_OFF_DVI = -2, /* no aux data for HDMI-DVI converter */
|
|
|
|
HDMI_AUDIO_OFF, /* force turn off HDMI audio */
|
|
|
|
HDMI_AUDIO_AUTO, /* trust EDID */
|
|
|
|
HDMI_AUDIO_ON, /* force turn on HDMI audio */
|
|
|
|
};
|
|
|
|
|
2013-07-04 18:06:28 +07:00
|
|
|
#define I915_GTT_OFFSET_NONE ((u32)-1)
|
2012-11-15 18:32:19 +07:00
|
|
|
|
2014-06-19 04:28:09 +07:00
|
|
|
/*
|
|
|
|
* Frontbuffer tracking bits. Set in obj->frontbuffer_bits while a gem bo is
|
2015-09-14 23:05:42 +07:00
|
|
|
* considered to be the frontbuffer for the given plane interface-wise. This
|
2014-06-19 04:28:09 +07:00
|
|
|
* doesn't mean that the hw necessarily already scans it out, but that any
|
|
|
|
* rendering (by the cpu or gpu) will land in the frontbuffer eventually.
|
|
|
|
*
|
|
|
|
* We have one bit per pipe and per scanout plane type.
|
|
|
|
*/
|
2015-09-14 23:05:42 +07:00
|
|
|
#define INTEL_FRONTBUFFER_BITS_PER_PIPE 8
|
2018-01-25 01:36:42 +07:00
|
|
|
#define INTEL_FRONTBUFFER(pipe, plane_id) ({ \
|
|
|
|
BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES > 32); \
|
|
|
|
BUILD_BUG_ON(I915_MAX_PLANES > INTEL_FRONTBUFFER_BITS_PER_PIPE); \
|
|
|
|
BIT((plane_id) + INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)); \
|
|
|
|
})
|
2014-06-19 04:28:09 +07:00
|
|
|
#define INTEL_FRONTBUFFER_OVERLAY(pipe) \
|
2018-01-25 01:36:42 +07:00
|
|
|
BIT(INTEL_FRONTBUFFER_BITS_PER_PIPE - 1 + INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))
|
2014-06-18 18:59:13 +07:00
|
|
|
#define INTEL_FRONTBUFFER_ALL_MASK(pipe) \
|
2018-01-25 01:36:42 +07:00
|
|
|
GENMASK(INTEL_FRONTBUFFER_BITS_PER_PIPE * ((pipe) + 1) - 1, \
|
|
|
|
INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))
|
2014-06-19 04:28:09 +07:00
|
|
|
|
2016-05-20 17:54:06 +07:00
|
|
|
/*
|
|
|
|
* Optimised SGL iterator for GEM objects
|
|
|
|
*/
|
|
|
|
static __always_inline struct sgt_iter {
|
|
|
|
struct scatterlist *sgp;
|
|
|
|
union {
|
|
|
|
unsigned long pfn;
|
|
|
|
dma_addr_t dma;
|
|
|
|
};
|
|
|
|
unsigned int curr;
|
|
|
|
unsigned int max;
|
|
|
|
} __sgt_iter(struct scatterlist *sgl, bool dma) {
|
|
|
|
struct sgt_iter s = { .sgp = sgl };
|
|
|
|
|
|
|
|
if (s.sgp) {
|
|
|
|
s.max = s.curr = s.sgp->offset;
|
|
|
|
s.max += s.sgp->length;
|
|
|
|
if (dma)
|
|
|
|
s.dma = sg_dma_address(s.sgp);
|
|
|
|
else
|
|
|
|
s.pfn = page_to_pfn(sg_page(s.sgp));
|
|
|
|
}
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2016-10-28 19:58:33 +07:00
|
|
|
static inline struct scatterlist *____sg_next(struct scatterlist *sg)
|
|
|
|
{
|
|
|
|
++sg;
|
|
|
|
if (unlikely(sg_is_chain(sg)))
|
|
|
|
sg = sg_chain_ptr(sg);
|
|
|
|
return sg;
|
|
|
|
}
|
|
|
|
|
2016-05-20 17:54:07 +07:00
|
|
|
/**
|
|
|
|
* __sg_next - return the next scatterlist entry in a list
|
|
|
|
* @sg: The current sg entry
|
|
|
|
*
|
|
|
|
* Description:
|
|
|
|
* If the entry is the last, return NULL; otherwise, step to the next
|
|
|
|
* element in the array (@sg@+1). If that's a chain pointer, follow it;
|
|
|
|
* otherwise just return the pointer to the current element.
|
|
|
|
**/
|
|
|
|
static inline struct scatterlist *__sg_next(struct scatterlist *sg)
|
|
|
|
{
|
2016-10-28 19:58:33 +07:00
|
|
|
return sg_is_last(sg) ? NULL : ____sg_next(sg);
|
2016-05-20 17:54:07 +07:00
|
|
|
}
|
|
|
|
|
2016-05-20 17:54:06 +07:00
|
|
|
/**
|
|
|
|
* for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
|
|
|
|
* @__dmap: DMA address (output)
|
|
|
|
* @__iter: 'struct sgt_iter' (iterator state, internal)
|
|
|
|
* @__sgt: sg_table to iterate over (input)
|
|
|
|
*/
|
|
|
|
#define for_each_sgt_dma(__dmap, __iter, __sgt) \
|
|
|
|
for ((__iter) = __sgt_iter((__sgt)->sgl, true); \
|
|
|
|
((__dmap) = (__iter).dma + (__iter).curr); \
|
2018-09-13 22:04:05 +07:00
|
|
|
(((__iter).curr += I915_GTT_PAGE_SIZE) >= (__iter).max) ? \
|
2017-09-13 17:57:54 +07:00
|
|
|
(__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0 : 0)
|
2016-05-20 17:54:06 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* for_each_sgt_page - iterate over the pages of the given sg_table
|
|
|
|
* @__pp: page pointer (output)
|
|
|
|
* @__iter: 'struct sgt_iter' (iterator state, internal)
|
|
|
|
* @__sgt: sg_table to iterate over (input)
|
|
|
|
*/
|
|
|
|
#define for_each_sgt_page(__pp, __iter, __sgt) \
|
|
|
|
for ((__iter) = __sgt_iter((__sgt)->sgl, false); \
|
|
|
|
((__pp) = (__iter).pfn == 0 ? NULL : \
|
|
|
|
pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
|
2017-09-13 17:57:54 +07:00
|
|
|
(((__iter).curr += PAGE_SIZE) >= (__iter).max) ? \
|
|
|
|
(__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0 : 0)
|
2014-06-19 04:28:09 +07:00
|
|
|
|
2017-10-07 05:18:18 +07:00
|
|
|
static inline unsigned int i915_sg_page_sizes(struct scatterlist *sg)
|
|
|
|
{
|
|
|
|
unsigned int page_sizes;
|
|
|
|
|
|
|
|
page_sizes = 0;
|
|
|
|
while (sg) {
|
|
|
|
GEM_BUG_ON(sg->offset);
|
|
|
|
GEM_BUG_ON(!IS_ALIGNED(sg->length, PAGE_SIZE));
|
|
|
|
page_sizes |= sg->length;
|
|
|
|
sg = __sg_next(sg);
|
|
|
|
}
|
|
|
|
|
|
|
|
return page_sizes;
|
|
|
|
}
|
|
|
|
|
2017-08-03 16:14:17 +07:00
|
|
|
static inline unsigned int i915_sg_segment_size(void)
|
|
|
|
{
|
|
|
|
unsigned int size = swiotlb_max_segment();
|
|
|
|
|
|
|
|
if (size == 0)
|
|
|
|
return SCATTERLIST_MAX_SEGMENT;
|
|
|
|
|
|
|
|
size = rounddown(size, PAGE_SIZE);
|
|
|
|
/* swiotlb_max_segment_size can return 1 byte when it means one page. */
|
|
|
|
if (size < PAGE_SIZE)
|
|
|
|
size = PAGE_SIZE;
|
|
|
|
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
2016-11-16 15:55:45 +07:00
|
|
|
static inline const struct intel_device_info *
|
|
|
|
intel_info(const struct drm_i915_private *dev_priv)
|
|
|
|
{
|
|
|
|
return &dev_priv->info;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define INTEL_INFO(dev_priv) intel_info((dev_priv))
|
2018-07-06 17:14:41 +07:00
|
|
|
#define DRIVER_CAPS(dev_priv) (&(dev_priv)->caps)
|
2016-10-13 17:02:58 +07:00
|
|
|
|
2016-10-14 15:17:22 +07:00
|
|
|
#define INTEL_GEN(dev_priv) ((dev_priv)->info.gen)
|
2016-10-13 17:02:58 +07:00
|
|
|
#define INTEL_DEVID(dev_priv) ((dev_priv)->info.device_id)
|
2010-11-09 16:17:32 +07:00
|
|
|
|
2015-10-20 19:22:02 +07:00
|
|
|
#define REVID_FOREVER 0xff
|
2016-11-04 21:42:46 +07:00
|
|
|
#define INTEL_REVID(dev_priv) ((dev_priv)->drm.pdev->revision)
|
2016-05-10 16:57:08 +07:00
|
|
|
|
|
|
|
#define GEN_FOREVER (0)
|
2017-09-13 18:52:54 +07:00
|
|
|
|
|
|
|
#define INTEL_GEN_MASK(s, e) ( \
|
|
|
|
BUILD_BUG_ON_ZERO(!__builtin_constant_p(s)) + \
|
|
|
|
BUILD_BUG_ON_ZERO(!__builtin_constant_p(e)) + \
|
|
|
|
GENMASK((e) != GEN_FOREVER ? (e) - 1 : BITS_PER_LONG - 1, \
|
|
|
|
(s) != GEN_FOREVER ? (s) - 1 : 0) \
|
|
|
|
)
|
|
|
|
|
2016-05-10 16:57:08 +07:00
|
|
|
/*
|
|
|
|
* Returns true if Gen is in inclusive range [Start, End].
|
|
|
|
*
|
|
|
|
* Use GEN_FOREVER for unbound start and or end.
|
|
|
|
*/
|
2017-09-13 18:52:54 +07:00
|
|
|
#define IS_GEN(dev_priv, s, e) \
|
|
|
|
(!!((dev_priv)->info.gen_mask & INTEL_GEN_MASK((s), (e))))
|
2016-05-10 16:57:08 +07:00
|
|
|
|
2015-10-20 19:22:02 +07:00
|
|
|
/*
|
|
|
|
* Return true if revision is in range [since,until] inclusive.
|
|
|
|
*
|
|
|
|
* Use 0 for open-ended since, and REVID_FOREVER for open-ended until.
|
|
|
|
*/
|
|
|
|
#define IS_REVID(p, since, until) \
|
|
|
|
(INTEL_REVID(p) >= (since) && INTEL_REVID(p) <= (until))
|
|
|
|
|
2017-09-27 23:41:38 +07:00
|
|
|
#define IS_PLATFORM(dev_priv, p) ((dev_priv)->info.platform_mask & BIT(p))
|
2017-09-20 16:26:59 +07:00
|
|
|
|
|
|
|
#define IS_I830(dev_priv) IS_PLATFORM(dev_priv, INTEL_I830)
|
|
|
|
#define IS_I845G(dev_priv) IS_PLATFORM(dev_priv, INTEL_I845G)
|
|
|
|
#define IS_I85X(dev_priv) IS_PLATFORM(dev_priv, INTEL_I85X)
|
|
|
|
#define IS_I865G(dev_priv) IS_PLATFORM(dev_priv, INTEL_I865G)
|
|
|
|
#define IS_I915G(dev_priv) IS_PLATFORM(dev_priv, INTEL_I915G)
|
|
|
|
#define IS_I915GM(dev_priv) IS_PLATFORM(dev_priv, INTEL_I915GM)
|
|
|
|
#define IS_I945G(dev_priv) IS_PLATFORM(dev_priv, INTEL_I945G)
|
|
|
|
#define IS_I945GM(dev_priv) IS_PLATFORM(dev_priv, INTEL_I945GM)
|
|
|
|
#define IS_I965G(dev_priv) IS_PLATFORM(dev_priv, INTEL_I965G)
|
|
|
|
#define IS_I965GM(dev_priv) IS_PLATFORM(dev_priv, INTEL_I965GM)
|
|
|
|
#define IS_G45(dev_priv) IS_PLATFORM(dev_priv, INTEL_G45)
|
|
|
|
#define IS_GM45(dev_priv) IS_PLATFORM(dev_priv, INTEL_GM45)
|
2016-11-30 22:43:05 +07:00
|
|
|
#define IS_G4X(dev_priv) (IS_G45(dev_priv) || IS_GM45(dev_priv))
|
2016-10-13 17:02:58 +07:00
|
|
|
#define IS_PINEVIEW_G(dev_priv) (INTEL_DEVID(dev_priv) == 0xa001)
|
|
|
|
#define IS_PINEVIEW_M(dev_priv) (INTEL_DEVID(dev_priv) == 0xa011)
|
2017-09-20 16:26:59 +07:00
|
|
|
#define IS_PINEVIEW(dev_priv) IS_PLATFORM(dev_priv, INTEL_PINEVIEW)
|
|
|
|
#define IS_G33(dev_priv) IS_PLATFORM(dev_priv, INTEL_G33)
|
2016-10-13 17:02:58 +07:00
|
|
|
#define IS_IRONLAKE_M(dev_priv) (INTEL_DEVID(dev_priv) == 0x0046)
|
2017-09-20 16:26:59 +07:00
|
|
|
#define IS_IVYBRIDGE(dev_priv) IS_PLATFORM(dev_priv, INTEL_IVYBRIDGE)
|
2017-08-30 23:12:07 +07:00
|
|
|
#define IS_IVB_GT1(dev_priv) (IS_IVYBRIDGE(dev_priv) && \
|
|
|
|
(dev_priv)->info.gt == 1)
|
2017-09-20 16:26:59 +07:00
|
|
|
#define IS_VALLEYVIEW(dev_priv) IS_PLATFORM(dev_priv, INTEL_VALLEYVIEW)
|
|
|
|
#define IS_CHERRYVIEW(dev_priv) IS_PLATFORM(dev_priv, INTEL_CHERRYVIEW)
|
|
|
|
#define IS_HASWELL(dev_priv) IS_PLATFORM(dev_priv, INTEL_HASWELL)
|
|
|
|
#define IS_BROADWELL(dev_priv) IS_PLATFORM(dev_priv, INTEL_BROADWELL)
|
|
|
|
#define IS_SKYLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_SKYLAKE)
|
|
|
|
#define IS_BROXTON(dev_priv) IS_PLATFORM(dev_priv, INTEL_BROXTON)
|
|
|
|
#define IS_KABYLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_KABYLAKE)
|
|
|
|
#define IS_GEMINILAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_GEMINILAKE)
|
|
|
|
#define IS_COFFEELAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_COFFEELAKE)
|
|
|
|
#define IS_CANNONLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_CANNONLAKE)
|
2018-01-12 01:00:04 +07:00
|
|
|
#define IS_ICELAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_ICELAKE)
|
2016-11-01 03:37:14 +07:00
|
|
|
#define IS_MOBILE(dev_priv) ((dev_priv)->info.is_mobile)
|
2016-10-13 17:02:58 +07:00
|
|
|
#define IS_HSW_EARLY_SDV(dev_priv) (IS_HASWELL(dev_priv) && \
|
|
|
|
(INTEL_DEVID(dev_priv) & 0xFF00) == 0x0C00)
|
|
|
|
#define IS_BDW_ULT(dev_priv) (IS_BROADWELL(dev_priv) && \
|
|
|
|
((INTEL_DEVID(dev_priv) & 0xf) == 0x6 || \
|
|
|
|
(INTEL_DEVID(dev_priv) & 0xf) == 0xb || \
|
|
|
|
(INTEL_DEVID(dev_priv) & 0xf) == 0xe))
|
2015-06-03 19:45:12 +07:00
|
|
|
/* ULX machines are also considered ULT. */
|
2016-10-13 17:02:58 +07:00
|
|
|
#define IS_BDW_ULX(dev_priv) (IS_BROADWELL(dev_priv) && \
|
|
|
|
(INTEL_DEVID(dev_priv) & 0xf) == 0xe)
|
|
|
|
#define IS_BDW_GT3(dev_priv) (IS_BROADWELL(dev_priv) && \
|
2017-08-30 23:12:07 +07:00
|
|
|
(dev_priv)->info.gt == 3)
|
2016-10-13 17:02:58 +07:00
|
|
|
#define IS_HSW_ULT(dev_priv) (IS_HASWELL(dev_priv) && \
|
|
|
|
(INTEL_DEVID(dev_priv) & 0xFF00) == 0x0A00)
|
|
|
|
#define IS_HSW_GT3(dev_priv) (IS_HASWELL(dev_priv) && \
|
2017-08-30 23:12:07 +07:00
|
|
|
(dev_priv)->info.gt == 3)
|
2014-04-29 21:00:22 +07:00
|
|
|
/* ULX machines are also considered ULT. */
|
2016-10-13 17:02:58 +07:00
|
|
|
#define IS_HSW_ULX(dev_priv) (INTEL_DEVID(dev_priv) == 0x0A0E || \
|
|
|
|
INTEL_DEVID(dev_priv) == 0x0A1E)
|
|
|
|
#define IS_SKL_ULT(dev_priv) (INTEL_DEVID(dev_priv) == 0x1906 || \
|
|
|
|
INTEL_DEVID(dev_priv) == 0x1913 || \
|
|
|
|
INTEL_DEVID(dev_priv) == 0x1916 || \
|
|
|
|
INTEL_DEVID(dev_priv) == 0x1921 || \
|
|
|
|
INTEL_DEVID(dev_priv) == 0x1926)
|
|
|
|
#define IS_SKL_ULX(dev_priv) (INTEL_DEVID(dev_priv) == 0x190E || \
|
|
|
|
INTEL_DEVID(dev_priv) == 0x1915 || \
|
|
|
|
INTEL_DEVID(dev_priv) == 0x191E)
|
|
|
|
#define IS_KBL_ULT(dev_priv) (INTEL_DEVID(dev_priv) == 0x5906 || \
|
|
|
|
INTEL_DEVID(dev_priv) == 0x5913 || \
|
|
|
|
INTEL_DEVID(dev_priv) == 0x5916 || \
|
|
|
|
INTEL_DEVID(dev_priv) == 0x5921 || \
|
|
|
|
INTEL_DEVID(dev_priv) == 0x5926)
|
|
|
|
#define IS_KBL_ULX(dev_priv) (INTEL_DEVID(dev_priv) == 0x590E || \
|
|
|
|
INTEL_DEVID(dev_priv) == 0x5915 || \
|
|
|
|
INTEL_DEVID(dev_priv) == 0x591E)
|
2017-06-13 18:23:03 +07:00
|
|
|
#define IS_SKL_GT2(dev_priv) (IS_SKYLAKE(dev_priv) && \
|
2017-08-30 23:12:07 +07:00
|
|
|
(dev_priv)->info.gt == 2)
|
2016-10-13 17:02:58 +07:00
|
|
|
#define IS_SKL_GT3(dev_priv) (IS_SKYLAKE(dev_priv) && \
|
2017-08-30 23:12:07 +07:00
|
|
|
(dev_priv)->info.gt == 3)
|
2016-10-13 17:02:58 +07:00
|
|
|
#define IS_SKL_GT4(dev_priv) (IS_SKYLAKE(dev_priv) && \
|
2017-08-30 23:12:07 +07:00
|
|
|
(dev_priv)->info.gt == 4)
|
2017-06-13 18:23:07 +07:00
|
|
|
#define IS_KBL_GT2(dev_priv) (IS_KABYLAKE(dev_priv) && \
|
2017-08-30 23:12:07 +07:00
|
|
|
(dev_priv)->info.gt == 2)
|
2017-06-13 18:23:07 +07:00
|
|
|
#define IS_KBL_GT3(dev_priv) (IS_KABYLAKE(dev_priv) && \
|
2017-08-30 23:12:07 +07:00
|
|
|
(dev_priv)->info.gt == 3)
|
2017-06-10 05:02:50 +07:00
|
|
|
#define IS_CFL_ULT(dev_priv) (IS_COFFEELAKE(dev_priv) && \
|
|
|
|
(INTEL_DEVID(dev_priv) & 0x00F0) == 0x00A0)
|
2017-09-18 18:21:24 +07:00
|
|
|
#define IS_CFL_GT2(dev_priv) (IS_COFFEELAKE(dev_priv) && \
|
|
|
|
(dev_priv)->info.gt == 2)
|
2017-11-11 02:08:40 +07:00
|
|
|
#define IS_CFL_GT3(dev_priv) (IS_COFFEELAKE(dev_priv) && \
|
|
|
|
(dev_priv)->info.gt == 3)
|
2018-01-30 06:22:14 +07:00
|
|
|
#define IS_CNL_WITH_PORT_F(dev_priv) (IS_CANNONLAKE(dev_priv) && \
|
|
|
|
(INTEL_DEVID(dev_priv) & 0x0004) == 0x0004)
|
2015-09-12 11:47:50 +07:00
|
|
|
|
2016-10-31 17:18:28 +07:00
|
|
|
#define IS_ALPHA_SUPPORT(intel_info) ((intel_info)->is_alpha_support)
|
2010-11-09 16:17:32 +07:00
|
|
|
|
2015-10-20 19:22:00 +07:00
|
|
|
#define SKL_REVID_A0 0x0
|
|
|
|
#define SKL_REVID_B0 0x1
|
|
|
|
#define SKL_REVID_C0 0x2
|
|
|
|
#define SKL_REVID_D0 0x3
|
|
|
|
#define SKL_REVID_E0 0x4
|
|
|
|
#define SKL_REVID_F0 0x5
|
2016-07-20 18:26:12 +07:00
|
|
|
#define SKL_REVID_G0 0x6
|
|
|
|
#define SKL_REVID_H0 0x7
|
2015-10-20 19:22:00 +07:00
|
|
|
|
2015-10-20 19:22:02 +07:00
|
|
|
#define IS_SKL_REVID(p, since, until) (IS_SKYLAKE(p) && IS_REVID(p, since, until))
|
|
|
|
|
2015-10-20 19:22:00 +07:00
|
|
|
#define BXT_REVID_A0 0x0
|
2015-10-20 19:22:01 +07:00
|
|
|
#define BXT_REVID_A1 0x1
|
2015-10-20 19:22:00 +07:00
|
|
|
#define BXT_REVID_B0 0x3
|
2016-11-24 20:23:27 +07:00
|
|
|
#define BXT_REVID_B_LAST 0x8
|
2015-10-20 19:22:00 +07:00
|
|
|
#define BXT_REVID_C0 0x9
|
2015-03-20 16:03:52 +07:00
|
|
|
|
2016-10-13 17:03:04 +07:00
|
|
|
#define IS_BXT_REVID(dev_priv, since, until) \
|
|
|
|
(IS_BROXTON(dev_priv) && IS_REVID(dev_priv, since, until))
|
2015-10-20 19:22:02 +07:00
|
|
|
|
2016-06-07 21:18:55 +07:00
|
|
|
#define KBL_REVID_A0 0x0
|
|
|
|
#define KBL_REVID_B0 0x1
|
2016-06-07 21:19:03 +07:00
|
|
|
#define KBL_REVID_C0 0x2
|
|
|
|
#define KBL_REVID_D0 0x3
|
|
|
|
#define KBL_REVID_E0 0x4
|
2016-06-07 21:18:55 +07:00
|
|
|
|
2016-10-13 17:03:02 +07:00
|
|
|
#define IS_KBL_REVID(dev_priv, since, until) \
|
|
|
|
(IS_KABYLAKE(dev_priv) && IS_REVID(dev_priv, since, until))
|
2016-06-07 21:18:55 +07:00
|
|
|
|
2017-02-22 13:34:29 +07:00
|
|
|
#define GLK_REVID_A0 0x0
|
|
|
|
#define GLK_REVID_A1 0x1
|
|
|
|
|
|
|
|
#define IS_GLK_REVID(dev_priv, since, until) \
|
|
|
|
(IS_GEMINILAKE(dev_priv) && IS_REVID(dev_priv, since, until))
|
|
|
|
|
2017-06-07 03:30:34 +07:00
|
|
|
#define CNL_REVID_A0 0x0
|
|
|
|
#define CNL_REVID_B0 0x1
|
2017-08-23 06:58:28 +07:00
|
|
|
#define CNL_REVID_C0 0x2
|
2017-06-07 03:30:34 +07:00
|
|
|
|
|
|
|
#define IS_CNL_REVID(p, since, until) \
|
|
|
|
(IS_CANNONLAKE(p) && IS_REVID(p, since, until))
|
|
|
|
|
2018-05-09 04:29:23 +07:00
|
|
|
#define ICL_REVID_A0 0x0
|
|
|
|
#define ICL_REVID_A2 0x1
|
|
|
|
#define ICL_REVID_B0 0x3
|
|
|
|
#define ICL_REVID_B2 0x4
|
|
|
|
#define ICL_REVID_C0 0x5
|
|
|
|
|
|
|
|
#define IS_ICL_REVID(p, since, until) \
|
|
|
|
(IS_ICELAKE(p) && IS_REVID(p, since, until))
|
|
|
|
|
2011-04-07 02:11:14 +07:00
|
|
|
/*
|
|
|
|
* The genX designation typically refers to the render engine, so render
|
|
|
|
* capability related checks should use IS_GEN, while display and other checks
|
|
|
|
* have their own (e.g. HAS_PCH_SPLIT for ILK+ display, IS_foo for particular
|
|
|
|
* chips, etc.).
|
|
|
|
*/
|
2016-10-13 17:03:10 +07:00
|
|
|
#define IS_GEN2(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(1)))
|
|
|
|
#define IS_GEN3(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(2)))
|
|
|
|
#define IS_GEN4(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(3)))
|
|
|
|
#define IS_GEN5(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(4)))
|
|
|
|
#define IS_GEN6(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(5)))
|
|
|
|
#define IS_GEN7(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(6)))
|
|
|
|
#define IS_GEN8(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(7)))
|
|
|
|
#define IS_GEN9(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(8)))
|
2017-06-07 03:30:30 +07:00
|
|
|
#define IS_GEN10(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(9)))
|
2018-01-12 01:00:04 +07:00
|
|
|
#define IS_GEN11(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(10)))
|
2010-11-09 16:17:32 +07:00
|
|
|
|
2016-12-19 04:36:26 +07:00
|
|
|
#define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp)
|
2017-01-24 01:32:37 +07:00
|
|
|
#define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && IS_LP(dev_priv))
|
|
|
|
#define IS_GEN9_BC(dev_priv) (IS_GEN9(dev_priv) && !IS_LP(dev_priv))
|
2016-11-10 22:23:09 +07:00
|
|
|
|
2016-06-23 20:52:41 +07:00
|
|
|
#define ENGINE_MASK(id) BIT(id)
|
|
|
|
#define RENDER_RING ENGINE_MASK(RCS)
|
|
|
|
#define BSD_RING ENGINE_MASK(VCS)
|
|
|
|
#define BLT_RING ENGINE_MASK(BCS)
|
|
|
|
#define VEBOX_RING ENGINE_MASK(VECS)
|
|
|
|
#define BSD2_RING ENGINE_MASK(VCS2)
|
2018-02-28 17:11:52 +07:00
|
|
|
#define BSD3_RING ENGINE_MASK(VCS3)
|
|
|
|
#define BSD4_RING ENGINE_MASK(VCS4)
|
|
|
|
#define VEBOX2_RING ENGINE_MASK(VECS2)
|
2016-06-23 20:52:41 +07:00
|
|
|
#define ALL_ENGINES (~0)
|
|
|
|
|
|
|
|
#define HAS_ENGINE(dev_priv, id) \
|
2016-11-04 21:42:44 +07:00
|
|
|
(!!((dev_priv)->info.ring_mask & ENGINE_MASK(id)))
|
2016-06-23 20:52:41 +07:00
|
|
|
|
|
|
|
#define HAS_BSD(dev_priv) HAS_ENGINE(dev_priv, VCS)
|
|
|
|
#define HAS_BSD2(dev_priv) HAS_ENGINE(dev_priv, VCS2)
|
|
|
|
#define HAS_BLT(dev_priv) HAS_ENGINE(dev_priv, BCS)
|
|
|
|
#define HAS_VEBOX(dev_priv) HAS_ENGINE(dev_priv, VECS)
|
|
|
|
|
2017-11-21 03:55:04 +07:00
|
|
|
#define HAS_LEGACY_SEMAPHORES(dev_priv) IS_GEN7(dev_priv)
|
|
|
|
|
2016-11-04 21:42:44 +07:00
|
|
|
#define HAS_LLC(dev_priv) ((dev_priv)->info.has_llc)
|
|
|
|
#define HAS_SNOOP(dev_priv) ((dev_priv)->info.has_snoop)
|
|
|
|
#define HAS_EDRAM(dev_priv) (!!((dev_priv)->edram_cap & EDRAM_ENABLED))
|
2016-10-13 17:03:00 +07:00
|
|
|
#define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \
|
|
|
|
IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
|
2010-11-09 16:17:32 +07:00
|
|
|
|
2016-11-04 21:42:44 +07:00
|
|
|
#define HWS_NEEDS_PHYSICAL(dev_priv) ((dev_priv)->info.hws_needs_physical)
|
2012-02-09 23:15:46 +07:00
|
|
|
|
2016-11-04 21:42:44 +07:00
|
|
|
#define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \
|
|
|
|
((dev_priv)->info.has_logical_ring_contexts)
|
2018-03-02 23:14:59 +07:00
|
|
|
#define HAS_LOGICAL_RING_ELSQ(dev_priv) \
|
|
|
|
((dev_priv)->info.has_logical_ring_elsq)
|
2017-10-26 03:00:18 +07:00
|
|
|
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
|
|
|
|
((dev_priv)->info.has_logical_ring_preemption)
|
2017-11-21 03:55:00 +07:00
|
|
|
|
|
|
|
#define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
|
|
|
|
|
2017-09-20 02:38:44 +07:00
|
|
|
#define USES_PPGTT(dev_priv) (i915_modparams.enable_ppgtt)
|
|
|
|
#define USES_FULL_PPGTT(dev_priv) (i915_modparams.enable_ppgtt >= 2)
|
|
|
|
#define USES_FULL_48BIT_PPGTT(dev_priv) (i915_modparams.enable_ppgtt == 3)
|
2017-10-07 05:18:18 +07:00
|
|
|
#define HAS_PAGE_SIZES(dev_priv, sizes) ({ \
|
|
|
|
GEM_BUG_ON((sizes) == 0); \
|
|
|
|
((sizes) & ~(dev_priv)->info.page_sizes) == 0; \
|
|
|
|
})
|
2016-11-04 21:42:44 +07:00
|
|
|
|
|
|
|
#define HAS_OVERLAY(dev_priv) ((dev_priv)->info.has_overlay)
|
|
|
|
#define OVERLAY_NEEDS_PHYSICAL(dev_priv) \
|
|
|
|
((dev_priv)->info.overlay_needs_physical)
|
2010-11-09 16:17:32 +07:00
|
|
|
|
2012-12-17 22:21:27 +07:00
|
|
|
/* Early gen2 have a totally busted CS tlb and require pinned batches. */
|
2016-11-30 22:43:04 +07:00
|
|
|
#define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv))
|
2015-12-17 00:18:37 +07:00
|
|
|
|
2018-02-23 03:05:35 +07:00
|
|
|
/* WaRsDisableCoarsePowerGating:skl,cnl */
|
2016-06-21 21:07:14 +07:00
|
|
|
#define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \
|
2018-02-23 03:05:35 +07:00
|
|
|
(IS_CANNONLAKE(dev_priv) || \
|
|
|
|
IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv))
|
2016-04-05 19:56:16 +07:00
|
|
|
|
2017-08-19 01:37:05 +07:00
|
|
|
#define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4)
|
2018-06-28 20:34:49 +07:00
|
|
|
#define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \
|
|
|
|
IS_GEMINILAKE(dev_priv) || \
|
|
|
|
IS_KABYLAKE(dev_priv))
|
2012-12-17 22:21:27 +07:00
|
|
|
|
2010-11-09 16:17:32 +07:00
|
|
|
/* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte
|
|
|
|
* rows, which changed the alignment requirements and fence programming.
|
|
|
|
*/
|
2016-10-13 17:02:58 +07:00
|
|
|
#define HAS_128_BYTE_Y_TILING(dev_priv) (!IS_GEN2(dev_priv) && \
|
|
|
|
!(IS_I915G(dev_priv) || \
|
|
|
|
IS_I915GM(dev_priv)))
|
2016-11-07 16:29:20 +07:00
|
|
|
#define SUPPORTS_TV(dev_priv) ((dev_priv)->info.supports_tv)
|
|
|
|
#define I915_HAS_HOTPLUG(dev_priv) ((dev_priv)->info.has_hotplug)
|
2010-11-09 16:17:32 +07:00
|
|
|
|
2016-11-07 16:29:20 +07:00
|
|
|
#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2)
|
|
|
|
#define HAS_FBC(dev_priv) ((dev_priv)->info.has_fbc)
|
2018-02-10 04:58:46 +07:00
|
|
|
#define HAS_CUR_FBC(dev_priv) (!HAS_GMCH_DISPLAY(dev_priv) && INTEL_GEN(dev_priv) >= 7)
|
2010-11-09 16:17:32 +07:00
|
|
|
|
2016-10-13 17:02:58 +07:00
|
|
|
#define HAS_IPS(dev_priv) (IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv))
|
2013-06-25 00:29:34 +07:00
|
|
|
|
2016-11-07 16:29:20 +07:00
|
|
|
#define HAS_DP_MST(dev_priv) ((dev_priv)->info.has_dp_mst)
|
2015-05-18 21:10:01 +07:00
|
|
|
|
2016-11-07 16:29:20 +07:00
|
|
|
#define HAS_DDI(dev_priv) ((dev_priv)->info.has_ddi)
|
|
|
|
#define HAS_FPGA_DBG_UNCLAIMED(dev_priv) ((dev_priv)->info.has_fpga_dbg)
|
|
|
|
#define HAS_PSR(dev_priv) ((dev_priv)->info.has_psr)
|
2017-12-01 18:30:30 +07:00
|
|
|
|
2016-11-07 16:29:20 +07:00
|
|
|
#define HAS_RC6(dev_priv) ((dev_priv)->info.has_rc6)
|
|
|
|
#define HAS_RC6p(dev_priv) ((dev_priv)->info.has_rc6p)
|
2017-12-01 18:30:30 +07:00
|
|
|
#define HAS_RC6pp(dev_priv) (false) /* HW was never validated */
|
2012-11-24 00:30:39 +07:00
|
|
|
|
2016-11-07 16:29:20 +07:00
|
|
|
#define HAS_CSR(dev_priv) ((dev_priv)->info.has_csr)
|
drm/i915/skl: Add support to load SKL CSR firmware.
Display Context Save and Restore support is needed for
various SKL Display C states like DC5, DC6.
This implementation is added based on first version of DMC CSR program
that we received from h/w team.
Here we are using request_firmware based design.
Finally this firmware should end up in linux-firmware tree.
For SKL platform its mandatory to ensure that we load this
csr program before enabling DC states like DC5/DC6.
As CSR program gets reset on various conditions, we should ensure
to load it during boot and in future change to be added to load
this system resume sequence too.
v1: Initial relese as RFC patch
v2: Design change as per Daniel, Damien and Shobit's review comments
request firmware method followed.
v3: Some optimization and functional changes.
Pulled register defines into drivers/gpu/drm/i915/i915_reg.h
Used kmemdup to allocate and duplicate firmware content.
Ensured to free allocated buffer.
v4: Modified as per review comments from Satheesh and Daniel
Removed temporary buffer.
Optimized number of writes by replacing I915_WRITE with I915_WRITE64.
v5:
Modified as per review comemnts from Damien.
- Changed name for functions and firmware.
- Introduced HAS_CSR.
- Reverted back previous change and used csr_buf with u8 size.
- Using cpu_to_be64 for endianness change.
Modified as per review comments from Imre.
- Modified registers and macro names to be a bit closer to bspec terminology
and the existing register naming in the driver.
- Early return for non SKL platforms in intel_load_csr_program function.
- Added locking around CSR program load function as it may be called
concurrently during system/runtime resume.
- Releasing the fw before loading the program for consistency
- Handled error path during f/w load.
v6: Modified as per review comments from Imre.
- Corrected out_freecsr sequence.
v7: Modified as per review comments from Imre.
Fail loading fw if fw->size%8!=0.
v8: Rebase to latest.
v9: Rebase on top of -nightly (Damien)
v10: Enabled support for dmc firmware ver 1.0.
According to ver 1.0 in a single binary package all the firmware's that are
required for different stepping's of the product will be stored. The package
contains the css header, followed by the package header and the actual dmc
firmwares. Package header contains the firmware/stepping mapping table and
the corresponding firmware offsets to the individual binaries, within the
package. Each individual program binary contains the header and the payload
sections whose size is specified in the header section. This changes are done
to extract the specific firmaware from the package. (Animesh)
v11: Modified as per review comemnts from Imre.
- Added code comment from bpec for header structure elements.
- Added __packed to avoid structure padding.
- Added helper functions for stepping and substepping info.
- Added code comment for CSR_MAX_FW_SIZE.
- Disabled BXT firmware loading, will be enabled with dmc 1.0 support.
- Changed skl_stepping_info based on bspec, earlier used from config DB.
- Removed duplicate call of cpu_to_be* from intel_csr_load_program function.
- Used cpu_to_be32 instead of cpu_to_be64 as firmware binary in dword aligned.
- Added sanity check for header length.
- Added sanity check for mmio address got from firmware binary.
- kmalloc done separately for dmc header and dmc firmware. (Animesh)
v12: Modified as per review comemnts from Imre.
- Corrected the typo error in skl stepping info structure.
- Added out-of-bound access for skl_stepping_info.
- Sanity check for mmio address modified.
- Sanity check added for stepping and substeppig.
- Modified the intel_dmc_info structure, cache only the required header info. (Animesh)
v13: clarify firmware load error message.
The reason for a firmware loading failure can be obscure if the driver
is built-in. Provide an explanation to the user about the likely reason for
the failure and how to resolve it. (Imre)
v14: Suggested by Jani.
- fix s/I915/CONFIG_DRM_I915/ typo
- add fw_path to the firmware object instead of using a static ptr (Jani)
v15:
1) Changed the firmware name as dmc_gen9.bin, everytime for a new firmware version a symbolic link
with same name will help not to build kernel again.
2) Changes done as per review comments from Imre.
- Error check removed for intel_csr_ucode_init.
- Moved csr-specific data structure to intel_csr.h and optimization done on structure definition.
- fw->data used directly for parsing the header info & memory allocation
only done separately for payload. (Animesh)
v16:
- No need for out_regs label in i915_driver_load(), so removed it.
- Changed the firmware name as skl_dmc_ver1.bin, followed naming convention <platform>_dmc_<api-version>.bin (Animesh)
Issue: VIZ-2569
Signed-off-by: A.Sunil Kamath <sunil.kamath@intel.com>
Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Animesh Manna <animesh.manna@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-05-04 19:58:44 +07:00
|
|
|
|
2016-10-13 17:02:55 +07:00
|
|
|
#define HAS_RUNTIME_PM(dev_priv) ((dev_priv)->info.has_runtime_pm)
|
2016-11-03 15:39:46 +07:00
|
|
|
#define HAS_64BIT_RELOC(dev_priv) ((dev_priv)->info.has_64bit_reloc)
|
|
|
|
|
2017-08-17 20:45:27 +07:00
|
|
|
#define HAS_IPC(dev_priv) ((dev_priv)->info.has_ipc)
|
|
|
|
|
2016-05-13 21:36:30 +07:00
|
|
|
/*
|
|
|
|
* For now, anything with a GuC requires uCode loading, and then supports
|
|
|
|
* command submission once loaded. But these are logically independent
|
|
|
|
* properties, so we have separate macros to test them.
|
|
|
|
*/
|
2016-11-04 21:42:46 +07:00
|
|
|
#define HAS_GUC(dev_priv) ((dev_priv)->info.has_guc)
|
2017-05-26 18:13:25 +07:00
|
|
|
#define HAS_GUC_CT(dev_priv) ((dev_priv)->info.has_guc_ct)
|
2016-11-04 21:42:46 +07:00
|
|
|
#define HAS_GUC_UCODE(dev_priv) (HAS_GUC(dev_priv))
|
|
|
|
#define HAS_GUC_SCHED(dev_priv) (HAS_GUC(dev_priv))
|
2017-12-06 20:53:10 +07:00
|
|
|
|
|
|
|
/* For now, anything with a GuC has also HuC */
|
|
|
|
#define HAS_HUC(dev_priv) (HAS_GUC(dev_priv))
|
2017-01-18 23:05:53 +07:00
|
|
|
#define HAS_HUC_UCODE(dev_priv) (HAS_GUC(dev_priv))
|
2015-08-12 21:43:36 +07:00
|
|
|
|
2017-12-06 20:53:12 +07:00
|
|
|
/* Having a GuC is not the same as using a GuC */
|
2017-12-06 20:53:15 +07:00
|
|
|
#define USES_GUC(dev_priv) intel_uc_is_using_guc()
|
|
|
|
#define USES_GUC_SUBMISSION(dev_priv) intel_uc_is_using_guc_submission()
|
|
|
|
#define USES_HUC(dev_priv) intel_uc_is_using_huc()
|
2017-12-06 20:53:12 +07:00
|
|
|
|
2016-11-04 21:42:46 +07:00
|
|
|
#define HAS_POOLED_EU(dev_priv) ((dev_priv)->info.has_pooled_eu)
|
2016-06-03 12:34:33 +07:00
|
|
|
|
2017-06-22 00:49:44 +07:00
|
|
|
#define INTEL_PCH_DEVICE_ID_MASK 0xff80
|
2012-11-21 00:12:07 +07:00
|
|
|
#define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00
|
|
|
|
#define INTEL_PCH_CPT_DEVICE_ID_TYPE 0x1c00
|
|
|
|
#define INTEL_PCH_PPT_DEVICE_ID_TYPE 0x1e00
|
|
|
|
#define INTEL_PCH_LPT_DEVICE_ID_TYPE 0x8c00
|
|
|
|
#define INTEL_PCH_LPT_LP_DEVICE_ID_TYPE 0x9c00
|
2017-06-22 00:49:44 +07:00
|
|
|
#define INTEL_PCH_WPT_DEVICE_ID_TYPE 0x8c80
|
|
|
|
#define INTEL_PCH_WPT_LP_DEVICE_ID_TYPE 0x9c80
|
2014-04-09 12:38:57 +07:00
|
|
|
#define INTEL_PCH_SPT_DEVICE_ID_TYPE 0xA100
|
|
|
|
#define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE 0x9D00
|
2017-06-22 00:49:44 +07:00
|
|
|
#define INTEL_PCH_KBP_DEVICE_ID_TYPE 0xA280
|
2017-06-03 03:06:39 +07:00
|
|
|
#define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300
|
2017-06-03 03:06:40 +07:00
|
|
|
#define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80
|
2018-01-12 01:00:10 +07:00
|
|
|
#define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480
|
2015-08-28 19:10:22 +07:00
|
|
|
#define INTEL_PCH_P2X_DEVICE_ID_TYPE 0x7100
|
2016-03-17 03:31:30 +07:00
|
|
|
#define INTEL_PCH_P3X_DEVICE_ID_TYPE 0x7000
|
2015-11-26 18:03:51 +07:00
|
|
|
#define INTEL_PCH_QEMU_DEVICE_ID_TYPE 0x2900 /* qemu q35 has 2918 */
|
2012-11-21 00:12:07 +07:00
|
|
|
|
2016-10-13 17:02:53 +07:00
|
|
|
#define INTEL_PCH_TYPE(dev_priv) ((dev_priv)->pch_type)
|
2018-02-06 00:31:39 +07:00
|
|
|
#define INTEL_PCH_ID(dev_priv) ((dev_priv)->pch_id)
|
2018-01-12 01:00:05 +07:00
|
|
|
#define HAS_PCH_ICP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_ICP)
|
2017-06-03 03:06:39 +07:00
|
|
|
#define HAS_PCH_CNP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_CNP)
|
2017-06-03 03:06:40 +07:00
|
|
|
#define HAS_PCH_CNP_LP(dev_priv) \
|
2018-02-06 00:31:39 +07:00
|
|
|
(INTEL_PCH_ID(dev_priv) == INTEL_PCH_CNP_LP_DEVICE_ID_TYPE)
|
2016-10-13 17:02:53 +07:00
|
|
|
#define HAS_PCH_KBP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_KBP)
|
|
|
|
#define HAS_PCH_SPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_SPT)
|
|
|
|
#define HAS_PCH_LPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_LPT)
|
2016-10-13 17:02:52 +07:00
|
|
|
#define HAS_PCH_LPT_LP(dev_priv) \
|
2018-02-06 00:31:39 +07:00
|
|
|
(INTEL_PCH_ID(dev_priv) == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE || \
|
|
|
|
INTEL_PCH_ID(dev_priv) == INTEL_PCH_WPT_LP_DEVICE_ID_TYPE)
|
2016-10-13 17:02:52 +07:00
|
|
|
#define HAS_PCH_LPT_H(dev_priv) \
|
2018-02-06 00:31:39 +07:00
|
|
|
(INTEL_PCH_ID(dev_priv) == INTEL_PCH_LPT_DEVICE_ID_TYPE || \
|
|
|
|
INTEL_PCH_ID(dev_priv) == INTEL_PCH_WPT_DEVICE_ID_TYPE)
|
2016-10-13 17:02:53 +07:00
|
|
|
#define HAS_PCH_CPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_CPT)
|
|
|
|
#define HAS_PCH_IBX(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_IBX)
|
|
|
|
#define HAS_PCH_NOP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_NOP)
|
|
|
|
#define HAS_PCH_SPLIT(dev_priv) (INTEL_PCH_TYPE(dev_priv) != PCH_NONE)
|
2010-11-09 16:17:32 +07:00
|
|
|
|
2016-10-13 17:02:54 +07:00
|
|
|
#define HAS_GMCH_DISPLAY(dev_priv) ((dev_priv)->info.has_gmch_display)
|
2014-07-21 16:53:38 +07:00
|
|
|
|
2017-06-10 05:26:14 +07:00
|
|
|
#define HAS_LSPCON(dev_priv) (INTEL_GEN(dev_priv) >= 9)
|
2016-10-14 21:26:50 +07:00
|
|
|
|
2013-09-20 01:01:40 +07:00
|
|
|
/* DPF == dynamic parity feature */
|
2016-10-13 17:03:05 +07:00
|
|
|
#define HAS_L3_DPF(dev_priv) ((dev_priv)->info.has_l3_dpf)
|
2016-10-13 17:02:58 +07:00
|
|
|
#define NUM_L3_SLICES(dev_priv) (IS_HSW_GT3(dev_priv) ? \
|
|
|
|
2 : HAS_L3_DPF(dev_priv))
|
2012-07-25 10:47:31 +07:00
|
|
|
|
2012-09-08 09:43:39 +07:00
|
|
|
#define GT_FREQUENCY_MULTIPLIER 50
|
2015-03-06 12:37:14 +07:00
|
|
|
#define GEN9_FREQ_SCALER 3
|
2012-09-08 09:43:39 +07:00
|
|
|
|
2010-11-09 02:18:58 +07:00
|
|
|
#include "i915_trace.h"
|
|
|
|
|
2017-05-25 19:16:12 +07:00
|
|
|
static inline bool intel_vtd_active(void)
|
2016-06-24 20:07:14 +07:00
|
|
|
{
|
|
|
|
#ifdef CONFIG_INTEL_IOMMU
|
2017-05-25 19:16:12 +07:00
|
|
|
if (intel_iommu_gfx_mapped)
|
2016-06-24 20:07:14 +07:00
|
|
|
return true;
|
|
|
|
#endif
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-05-25 19:16:12 +07:00
|
|
|
static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
|
|
|
|
{
|
|
|
|
return INTEL_GEN(dev_priv) >= 6 && intel_vtd_active();
|
|
|
|
}
|
|
|
|
|
2017-05-24 22:54:11 +07:00
|
|
|
static inline bool
|
|
|
|
intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *dev_priv)
|
|
|
|
{
|
2017-05-25 19:16:12 +07:00
|
|
|
return IS_BROXTON(dev_priv) && intel_vtd_active();
|
2017-05-24 22:54:11 +07:00
|
|
|
}
|
|
|
|
|
2016-05-06 21:40:21 +07:00
|
|
|
int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
|
2016-08-22 17:32:41 +07:00
|
|
|
int enable_ppgtt);
|
2016-04-29 19:18:22 +07:00
|
|
|
|
2016-06-24 20:00:22 +07:00
|
|
|
/* i915_drv.c */
|
2016-03-18 15:46:10 +07:00
|
|
|
void __printf(3, 4)
|
|
|
|
__i915_printk(struct drm_i915_private *dev_priv, const char *level,
|
|
|
|
const char *fmt, ...);
|
|
|
|
|
|
|
|
#define i915_report_error(dev_priv, fmt, ...) \
|
|
|
|
__i915_printk(dev_priv, KERN_ERR, fmt, ##__VA_ARGS__)
|
|
|
|
|
2012-04-17 04:07:40 +07:00
|
|
|
#ifdef CONFIG_COMPAT
|
2006-01-02 16:14:23 +07:00
|
|
|
extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
|
|
|
|
unsigned long arg);
|
2016-11-01 22:40:44 +07:00
|
|
|
#else
|
|
|
|
#define i915_compat_ioctl NULL
|
2012-04-17 04:07:40 +07:00
|
|
|
#endif
|
2016-09-15 20:28:54 +07:00
|
|
|
extern const struct dev_pm_ops i915_pm_ops;
|
|
|
|
|
|
|
|
extern int i915_driver_load(struct pci_dev *pdev,
|
|
|
|
const struct pci_device_id *ent);
|
|
|
|
extern void i915_driver_unload(struct drm_device *dev);
|
2016-05-10 20:10:04 +07:00
|
|
|
extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
|
|
|
|
extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
|
2017-07-21 19:32:37 +07:00
|
|
|
|
2018-04-07 05:03:54 +07:00
|
|
|
extern void i915_reset(struct drm_i915_private *i915,
|
|
|
|
unsigned int stalled_mask,
|
|
|
|
const char *reason);
|
|
|
|
extern int i915_reset_engine(struct intel_engine_cs *engine,
|
|
|
|
const char *reason);
|
2017-07-21 19:32:37 +07:00
|
|
|
|
drm/i915: Modify error handler for per engine hang recovery
This is a preparatory patch which modifies error handler to do per engine
hang recovery. The actual patch which implements this sequence follows
later in the series. The aim is to prepare existing recovery function to
adapt to this new function where applicable (which fails at this point
because core implementation is lacking) and continue recovery using legacy
full gpu reset.
A helper function is also added to query the availability of engine
reset. A subsequent patch will add the capability to query which type
of reset is present (engine -> full -> no-reset) via the get-param
ioctl.
It has been decided that the error events that are used to notify user of
reset will only be sent in case if full chip reset. In case of just
single (or multiple) engine resets, userspace won't be notified by these
events.
Note that this implementation of engine reset is for i915 directly
submitting to the ELSP, where the driver manages the hang detection,
recovery and resubmission. With GuC submission these tasks are shared
between driver and firmware; i915 will still responsible for detecting a
hang, and when it does it will have to request GuC to reset that Engine and
remind the firmware about the outstanding submissions. This will be
added in different patch.
v2: rebase, advertise engine reset availability in platform definition,
add note about GuC submission.
v3: s/*engine_reset*/*reset_engine*/. (Chris)
Handle reset as 2 level resets, by first going to engine only and fall
backing to full/chip reset as needed, i.e. reset_engine will need the
struct_mutex.
v4: Pass the engine mask to i915_reset. (Chris)
v5: Rebase, update selftests.
v6: Rebase, prepare for mutex-less reset engine.
v7: Pass reset_engine mask as a function parameter, and iterate over the
engine mask for reset_engine. (Chris)
v8: Use i915.reset >=2 in has_reset_engine; remove redundant reset
logging; add a reset-engine-in-progress flag to prevent concurrent
resets, and avoid dual purposing of reset-backoff. (Chris)
v9: Support reset of different engines in parallel (Chris)
v10: Handle reset-engine flag locking better (Chris)
v11: Squash in reporting of per-engine-reset availability.
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ian Lister <ian.lister@intel.com>
Signed-off-by: Tomas Elf <tomas.elf@intel.com>
Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170615201828.23144-4-michel.thierry@intel.com
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170620095751.13127-5-chris@chris-wilson.co.uk
2017-06-20 16:57:46 +07:00
|
|
|
extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv);
|
2017-10-31 01:56:14 +07:00
|
|
|
extern int intel_reset_guc(struct drm_i915_private *dev_priv);
|
2017-11-01 05:53:09 +07:00
|
|
|
extern int intel_guc_reset_engine(struct intel_guc *guc,
|
|
|
|
struct intel_engine_cs *engine);
|
2016-03-21 23:26:59 +07:00
|
|
|
extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
|
2016-11-01 23:43:03 +07:00
|
|
|
extern void intel_hangcheck_init(struct drm_i915_private *dev_priv);
|
2010-05-21 04:28:11 +07:00
|
|
|
extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
|
|
|
|
extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
|
|
|
|
extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
|
|
|
|
extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
|
2014-04-18 20:35:02 +07:00
|
|
|
int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
|
2010-05-21 04:28:11 +07:00
|
|
|
|
2017-04-28 14:53:36 +07:00
|
|
|
int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
|
2017-01-24 18:01:34 +07:00
|
|
|
int intel_engines_init(struct drm_i915_private *dev_priv);
|
|
|
|
|
2018-05-19 05:39:57 +07:00
|
|
|
u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
|
|
|
|
|
2015-06-18 17:06:16 +07:00
|
|
|
/* intel_hotplug.c */
|
2016-05-06 20:48:28 +07:00
|
|
|
void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
|
|
|
|
u32 pin_mask, u32 long_mask);
|
2015-06-18 17:06:16 +07:00
|
|
|
void intel_hpd_init(struct drm_i915_private *dev_priv);
|
|
|
|
void intel_hpd_init_work(struct drm_i915_private *dev_priv);
|
|
|
|
void intel_hpd_cancel_work(struct drm_i915_private *dev_priv);
|
2018-01-30 06:22:21 +07:00
|
|
|
enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv,
|
|
|
|
enum port port);
|
2016-06-22 04:03:43 +07:00
|
|
|
bool intel_hpd_disable(struct drm_i915_private *dev_priv, enum hpd_pin pin);
|
|
|
|
void intel_hpd_enable(struct drm_i915_private *dev_priv, enum hpd_pin pin);
|
2015-06-18 17:06:16 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* i915_irq.c */
|
2016-07-01 23:23:13 +07:00
|
|
|
static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv)
|
|
|
|
{
|
|
|
|
unsigned long delay;
|
|
|
|
|
2017-09-20 02:38:44 +07:00
|
|
|
if (unlikely(!i915_modparams.enable_hangcheck))
|
2016-07-01 23:23:13 +07:00
|
|
|
return;
|
|
|
|
|
|
|
|
/* Don't continually defer the hangcheck so that it is always run at
|
|
|
|
* least once after work has been scheduled on any ring. Otherwise,
|
|
|
|
* we will ignore a hung ring if a second ring is kept busy.
|
|
|
|
*/
|
|
|
|
|
|
|
|
delay = round_jiffies_up_relative(DRM_I915_HANGCHECK_JIFFIES);
|
|
|
|
queue_delayed_work(system_long_wq,
|
|
|
|
&dev_priv->gpu_error.hangcheck_work, delay);
|
|
|
|
}
|
|
|
|
|
2018-03-20 17:04:49 +07:00
|
|
|
__printf(4, 5)
|
2016-05-06 21:40:21 +07:00
|
|
|
void i915_handle_error(struct drm_i915_private *dev_priv,
|
|
|
|
u32 engine_mask,
|
2018-03-20 17:04:49 +07:00
|
|
|
unsigned long flags,
|
2014-02-25 22:11:26 +07:00
|
|
|
const char *fmt, ...);
|
2018-03-20 17:04:49 +07:00
|
|
|
#define I915_ERROR_CAPTURE BIT(0)
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2014-09-30 15:56:44 +07:00
|
|
|
extern void intel_irq_init(struct drm_i915_private *dev_priv);
|
2017-04-28 14:58:39 +07:00
|
|
|
extern void intel_irq_fini(struct drm_i915_private *dev_priv);
|
2014-09-30 15:56:43 +07:00
|
|
|
int intel_irq_install(struct drm_i915_private *dev_priv);
|
|
|
|
void intel_irq_uninstall(struct drm_i915_private *dev_priv);
|
2013-07-20 02:36:52 +07:00
|
|
|
|
2018-08-30 20:24:24 +07:00
|
|
|
void i915_clear_error_registers(struct drm_i915_private *dev_priv);
|
|
|
|
|
drm/i915: gvt: Introduce the basic architecture of GVT-g
This patch introduces the very basic framework of GVT-g device model,
includes basic prototypes, definitions, initialization.
v12:
- Call intel_gvt_init() in driver early initialization stage. (Chris)
v8:
- Remove the GVT idr and mutex in intel_gvt_host. (Joonas)
v7:
- Refine the URL link in Kconfig. (Joonas)
- Refine the introduction of GVT-g host support in Kconfig. (Joonas)
- Remove the macro GVT_ALIGN(), use round_down() instead. (Joonas)
- Make "struct intel_gvt" a data member in struct drm_i915_private.(Joonas)
- Remove {alloc, free}_gvt_device()
- Rename intel_gvt_{create, destroy}_gvt_device()
- Expost intel_gvt_init_host()
- Remove the dummy "struct intel_gvt" declaration in intel_gvt.h (Joonas)
v6:
- Refine introduction in Kconfig. (Chris)
- The exposed API functions will take struct intel_gvt * instead of
void *. (Chris/Tvrtko)
- Remove most memebers of strct intel_gvt_device_info. Will add them
in the device model patches.(Chris)
- Remove gvt_info() and gvt_err() in debug.h. (Chris)
- Move GVT kernel parameter into i915_params. (Chris)
- Remove include/drm/i915_gvt.h, as GVT-g will be built within i915.
- Remove the redundant struct i915_gvt *, as the functions in i915
will directly take struct intel_gvt *.
- Add more comments for reviewer.
v5:
Take Tvrtko's comments:
- Fix the misspelled words in Kconfig
- Let functions take drm_i915_private * instead of struct drm_device *
- Remove redundant prints/local varible initialization
v3:
Take Joonas' comments:
- Change file name i915_gvt.* to intel_gvt.*
- Move GVT kernel parameter into intel_gvt.c
- Remove redundant debug macros
- Change error handling style
- Add introductions for some stub functions
- Introduce drm/i915_gvt.h.
Take Kevin's comments:
- Move GVT-g host/guest check into intel_vgt_balloon in i915_gem_gtt.c
v2:
- Introduce i915_gvt.c.
It's necessary to introduce the stubs between i915 driver and GVT-g host,
as GVT-g components is configurable in kernel config. When disabled, the
stubs here do nothing.
Take Joonas' comments:
- Replace boolean return value with int.
- Replace customized info/warn/debug macros with DRM macros.
- Document all non-static functions like i915.
- Remove empty and unused functions.
- Replace magic number with marcos.
- Set GVT-g in kernel config to "n" by default.
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1466078825-6662-5-git-send-email-zhi.a.wang@intel.com
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
2016-06-16 19:07:00 +07:00
|
|
|
static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
|
|
|
|
{
|
2016-10-20 16:15:03 +07:00
|
|
|
return dev_priv->gvt;
|
drm/i915: gvt: Introduce the basic architecture of GVT-g
This patch introduces the very basic framework of GVT-g device model,
includes basic prototypes, definitions, initialization.
v12:
- Call intel_gvt_init() in driver early initialization stage. (Chris)
v8:
- Remove the GVT idr and mutex in intel_gvt_host. (Joonas)
v7:
- Refine the URL link in Kconfig. (Joonas)
- Refine the introduction of GVT-g host support in Kconfig. (Joonas)
- Remove the macro GVT_ALIGN(), use round_down() instead. (Joonas)
- Make "struct intel_gvt" a data member in struct drm_i915_private.(Joonas)
- Remove {alloc, free}_gvt_device()
- Rename intel_gvt_{create, destroy}_gvt_device()
- Expost intel_gvt_init_host()
- Remove the dummy "struct intel_gvt" declaration in intel_gvt.h (Joonas)
v6:
- Refine introduction in Kconfig. (Chris)
- The exposed API functions will take struct intel_gvt * instead of
void *. (Chris/Tvrtko)
- Remove most memebers of strct intel_gvt_device_info. Will add them
in the device model patches.(Chris)
- Remove gvt_info() and gvt_err() in debug.h. (Chris)
- Move GVT kernel parameter into i915_params. (Chris)
- Remove include/drm/i915_gvt.h, as GVT-g will be built within i915.
- Remove the redundant struct i915_gvt *, as the functions in i915
will directly take struct intel_gvt *.
- Add more comments for reviewer.
v5:
Take Tvrtko's comments:
- Fix the misspelled words in Kconfig
- Let functions take drm_i915_private * instead of struct drm_device *
- Remove redundant prints/local varible initialization
v3:
Take Joonas' comments:
- Change file name i915_gvt.* to intel_gvt.*
- Move GVT kernel parameter into intel_gvt.c
- Remove redundant debug macros
- Change error handling style
- Add introductions for some stub functions
- Introduce drm/i915_gvt.h.
Take Kevin's comments:
- Move GVT-g host/guest check into intel_vgt_balloon in i915_gem_gtt.c
v2:
- Introduce i915_gvt.c.
It's necessary to introduce the stubs between i915 driver and GVT-g host,
as GVT-g components is configurable in kernel config. When disabled, the
stubs here do nothing.
Take Joonas' comments:
- Replace boolean return value with int.
- Replace customized info/warn/debug macros with DRM macros.
- Document all non-static functions like i915.
- Remove empty and unused functions.
- Replace magic number with marcos.
- Set GVT-g in kernel config to "n" by default.
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1466078825-6662-5-git-send-email-zhi.a.wang@intel.com
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
2016-06-16 19:07:00 +07:00
|
|
|
}
|
|
|
|
|
2016-05-06 21:40:21 +07:00
|
|
|
static inline bool intel_vgpu_active(struct drm_i915_private *dev_priv)
|
2015-02-10 18:05:47 +07:00
|
|
|
{
|
2016-05-06 21:40:21 +07:00
|
|
|
return dev_priv->vgpu.active;
|
2015-02-10 18:05:47 +07:00
|
|
|
}
|
2011-04-07 02:13:38 +07:00
|
|
|
|
2017-09-14 22:17:31 +07:00
|
|
|
u32 i915_pipestat_enable_mask(struct drm_i915_private *dev_priv,
|
|
|
|
enum pipe pipe);
|
2008-11-04 17:03:27 +07:00
|
|
|
void
|
2014-03-31 18:27:21 +07:00
|
|
|
i915_enable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe,
|
2014-02-10 23:42:47 +07:00
|
|
|
u32 status_mask);
|
2008-11-04 17:03:27 +07:00
|
|
|
|
|
|
|
void
|
2014-03-31 18:27:21 +07:00
|
|
|
i915_disable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe,
|
2014-02-10 23:42:47 +07:00
|
|
|
u32 status_mask);
|
2008-11-04 17:03:27 +07:00
|
|
|
|
2014-03-05 00:23:07 +07:00
|
|
|
void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv);
|
|
|
|
void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv);
|
2015-09-23 21:15:27 +07:00
|
|
|
void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv,
|
|
|
|
uint32_t mask,
|
|
|
|
uint32_t bits);
|
2015-11-23 23:06:16 +07:00
|
|
|
void ilk_update_display_irq(struct drm_i915_private *dev_priv,
|
|
|
|
uint32_t interrupt_mask,
|
|
|
|
uint32_t enabled_irq_mask);
|
|
|
|
static inline void
|
|
|
|
ilk_enable_display_irq(struct drm_i915_private *dev_priv, uint32_t bits)
|
|
|
|
{
|
|
|
|
ilk_update_display_irq(dev_priv, bits, bits);
|
|
|
|
}
|
|
|
|
static inline void
|
|
|
|
ilk_disable_display_irq(struct drm_i915_private *dev_priv, uint32_t bits)
|
|
|
|
{
|
|
|
|
ilk_update_display_irq(dev_priv, bits, 0);
|
|
|
|
}
|
2015-11-23 23:06:17 +07:00
|
|
|
void bdw_update_pipe_irq(struct drm_i915_private *dev_priv,
|
|
|
|
enum pipe pipe,
|
|
|
|
uint32_t interrupt_mask,
|
|
|
|
uint32_t enabled_irq_mask);
|
|
|
|
static inline void bdw_enable_pipe_irq(struct drm_i915_private *dev_priv,
|
|
|
|
enum pipe pipe, uint32_t bits)
|
|
|
|
{
|
|
|
|
bdw_update_pipe_irq(dev_priv, pipe, bits, bits);
|
|
|
|
}
|
|
|
|
static inline void bdw_disable_pipe_irq(struct drm_i915_private *dev_priv,
|
|
|
|
enum pipe pipe, uint32_t bits)
|
|
|
|
{
|
|
|
|
bdw_update_pipe_irq(dev_priv, pipe, bits, 0);
|
|
|
|
}
|
2014-09-30 15:56:46 +07:00
|
|
|
void ibx_display_interrupt_update(struct drm_i915_private *dev_priv,
|
|
|
|
uint32_t interrupt_mask,
|
|
|
|
uint32_t enabled_irq_mask);
|
2015-11-23 23:06:15 +07:00
|
|
|
static inline void
|
|
|
|
ibx_enable_display_interrupt(struct drm_i915_private *dev_priv, uint32_t bits)
|
|
|
|
{
|
|
|
|
ibx_display_interrupt_update(dev_priv, bits, bits);
|
|
|
|
}
|
|
|
|
static inline void
|
|
|
|
ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, uint32_t bits)
|
|
|
|
{
|
|
|
|
ibx_display_interrupt_update(dev_priv, bits, 0);
|
|
|
|
}
|
|
|
|
|
2008-07-31 02:06:12 +07:00
|
|
|
/* i915_gem.c */
|
|
|
|
int i915_gem_create_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2008-11-13 01:03:55 +07:00
|
|
|
int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2008-07-31 02:06:12 +07:00
|
|
|
int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2018-02-07 23:48:41 +07:00
|
|
|
int i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2008-07-31 02:06:12 +07:00
|
|
|
int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2012-09-22 07:01:20 +07:00
|
|
|
int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file);
|
|
|
|
int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file);
|
2008-07-31 02:06:12 +07:00
|
|
|
int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2009-09-14 22:50:29 +07:00
|
|
|
int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2017-01-10 19:10:44 +07:00
|
|
|
int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
|
|
|
int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2017-06-16 21:05:22 +07:00
|
|
|
int i915_gem_init_userptr(struct drm_i915_private *dev_priv);
|
|
|
|
void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv);
|
drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).
v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.
v11: Drop vma behaviour changes -- locking is nigh on impossible.
Use a worker to load user pages to avoid lock inversions.
v12: Use get_task_mm()/mmput() for correct refcounting of mm.
v13: Use a worker to release the mmu_notifier to avoid lock inversion
v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
with its own locking and tree of objects for each mm/mmu_notifier.
v15: Prevent overlapping userptr objects, and invalidate all objects
within the mmu_notifier range
v16: Fix a typo for iterating over multiple objects in the range and
rearrange error path to destroy the mmu_notifier locklessly.
Also close a race between invalidate_range and the get_pages_worker.
v17: Close a race between get_pages_worker/invalidate_range and fresh
allocations of the same userptr range - and notice that
struct_mutex was presumed to be held when during creation it wasn't.
v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory
for the struct sg_table and to clear it before reporting an error.
v19: Always error out on read-only userptr requests as we don't have the
hardware infrastructure to support them at the moment.
v20: Refuse to implement read-only support until we have the required
infrastructure - but reserve the bit in flags for future use.
v21: use_mm() is not required for get_user_pages(). It is only meant to
be used to fix up the kernel thread's current->mm for use with
copy_user().
v22: Use sg_alloc_table_from_pages for that chunky feeling
v23: Export a function for sanity checking dma-buf rather than encode
userptr details elsewhere, and clean up comments based on
suggestions by Bradley.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Akash Goel <akash.goel@intel.com>
Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com>
[danvet: Frob ioctl allocation to pick the next one - will cause a bit
of fuss with create2 apparently, but such are the rules.]
[danvet2: oops, forgot to git add after manual patch application]
[danvet3: Appease sparse.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-05-16 20:22:37 +07:00
|
|
|
int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file);
|
2008-10-23 11:40:13 +07:00
|
|
|
int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2012-05-25 05:03:10 +07:00
|
|
|
int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file_priv);
|
2017-01-24 18:01:35 +07:00
|
|
|
void i915_gem_sanitize(struct drm_i915_private *i915);
|
2018-03-23 19:34:49 +07:00
|
|
|
int i915_gem_init_early(struct drm_i915_private *dev_priv);
|
|
|
|
void i915_gem_cleanup_early(struct drm_i915_private *dev_priv);
|
2016-03-16 19:54:03 +07:00
|
|
|
void i915_gem_load_init_fences(struct drm_i915_private *dev_priv);
|
2016-09-21 20:51:07 +07:00
|
|
|
int i915_gem_freeze(struct drm_i915_private *dev_priv);
|
2016-05-14 13:26:33 +07:00
|
|
|
int i915_gem_freeze_late(struct drm_i915_private *dev_priv);
|
|
|
|
|
2016-12-01 21:16:36 +07:00
|
|
|
void *i915_gem_object_alloc(struct drm_i915_private *dev_priv);
|
2012-11-15 18:32:30 +07:00
|
|
|
void i915_gem_object_free(struct drm_i915_gem_object *obj);
|
2012-06-07 21:38:42 +07:00
|
|
|
void i915_gem_object_init(struct drm_i915_gem_object *obj,
|
|
|
|
const struct drm_i915_gem_object_ops *ops);
|
2016-12-01 21:16:37 +07:00
|
|
|
struct drm_i915_gem_object *
|
|
|
|
i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size);
|
|
|
|
struct drm_i915_gem_object *
|
|
|
|
i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
|
|
|
|
const void *data, size_t size);
|
2016-08-04 13:52:45 +07:00
|
|
|
void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
|
2008-07-31 02:06:12 +07:00
|
|
|
void i915_gem_free_object(struct drm_gem_object *obj);
|
2012-11-15 18:32:30 +07:00
|
|
|
|
2016-12-23 21:57:56 +07:00
|
|
|
static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
|
|
|
|
{
|
2018-02-20 05:06:31 +07:00
|
|
|
if (!atomic_read(&i915->mm.free_count))
|
|
|
|
return;
|
|
|
|
|
2016-12-23 21:57:56 +07:00
|
|
|
/* A single pass should suffice to release all the freed objects (along
|
|
|
|
* most call paths) , but be a little more paranoid in that freeing
|
|
|
|
* the objects does take a little amount of time, during which the rcu
|
|
|
|
* callbacks could have added new objects into the freed list, and
|
|
|
|
* armed the work again.
|
|
|
|
*/
|
|
|
|
do {
|
|
|
|
rcu_barrier();
|
|
|
|
} while (flush_work(&i915->mm.free_work));
|
|
|
|
}
|
|
|
|
|
2017-07-18 20:41:24 +07:00
|
|
|
static inline void i915_gem_drain_workqueue(struct drm_i915_private *i915)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Similar to objects above (see i915_gem_drain_freed-objects), in
|
|
|
|
* general we have workers that are armed by RCU and then rearm
|
|
|
|
* themselves in their callbacks. To be paranoid, we need to
|
|
|
|
* drain the workqueue a second time after waiting for the RCU
|
|
|
|
* grace period so that we catch work queued via RCU from the first
|
|
|
|
* pass. As neither drain_workqueue() nor flush_workqueue() report
|
|
|
|
* a result, we make an assumption that we only don't require more
|
|
|
|
* than 2 passes to catch all recursive RCU delayed work.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
int pass = 2;
|
|
|
|
do {
|
|
|
|
rcu_barrier();
|
|
|
|
drain_workqueue(i915->wq);
|
|
|
|
} while (--pass);
|
|
|
|
}
|
|
|
|
|
2016-08-15 16:49:06 +07:00
|
|
|
struct i915_vma * __must_check
|
2015-03-16 19:11:13 +07:00
|
|
|
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
|
|
|
|
const struct i915_ggtt_view *view,
|
2016-08-04 22:32:23 +07:00
|
|
|
u64 size,
|
2016-08-04 22:32:22 +07:00
|
|
|
u64 alignment,
|
|
|
|
u64 flags);
|
2014-12-11 00:27:58 +07:00
|
|
|
|
2016-08-04 13:52:27 +07:00
|
|
|
int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
|
2010-11-09 02:18:58 +07:00
|
|
|
void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
|
2010-09-24 22:02:42 +07:00
|
|
|
|
2016-10-24 19:42:18 +07:00
|
|
|
void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
|
|
|
|
|
2016-10-28 19:58:35 +07:00
|
|
|
static inline int __sg_page_count(const struct scatterlist *sg)
|
2012-06-01 21:20:22 +07:00
|
|
|
{
|
2015-04-07 22:20:25 +07:00
|
|
|
return sg->length >> PAGE_SHIFT;
|
|
|
|
}
|
2013-02-19 00:28:02 +07:00
|
|
|
|
2016-10-28 19:58:33 +07:00
|
|
|
struct scatterlist *
|
|
|
|
i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
|
|
|
|
unsigned int n, unsigned int *offset);
|
2016-06-10 15:53:00 +07:00
|
|
|
|
2016-10-28 19:58:33 +07:00
|
|
|
struct page *
|
|
|
|
i915_gem_object_get_page(struct drm_i915_gem_object *obj,
|
|
|
|
unsigned int n);
|
2013-02-19 00:28:02 +07:00
|
|
|
|
2016-10-28 19:58:33 +07:00
|
|
|
struct page *
|
|
|
|
i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
|
|
|
|
unsigned int n);
|
2013-02-19 00:28:02 +07:00
|
|
|
|
2016-10-28 19:58:33 +07:00
|
|
|
dma_addr_t
|
|
|
|
i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
|
|
|
|
unsigned long n);
|
2015-04-07 22:20:25 +07:00
|
|
|
|
2016-10-28 19:58:36 +07:00
|
|
|
void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
|
2017-10-07 05:18:18 +07:00
|
|
|
struct sg_table *pages,
|
2017-10-09 18:00:24 +07:00
|
|
|
unsigned int sg_page_sizes);
|
2016-10-28 19:58:35 +07:00
|
|
|
int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
|
|
|
|
|
|
|
|
static inline int __must_check
|
|
|
|
i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
|
|
|
|
{
|
2016-10-28 19:58:37 +07:00
|
|
|
might_lock(&obj->mm.lock);
|
2016-10-28 19:58:35 +07:00
|
|
|
|
2016-10-28 19:58:37 +07:00
|
|
|
if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
|
2016-10-28 19:58:35 +07:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
return __i915_gem_object_get_pages(obj);
|
|
|
|
}
|
|
|
|
|
2017-10-14 03:26:13 +07:00
|
|
|
static inline bool
|
|
|
|
i915_gem_object_has_pages(struct drm_i915_gem_object *obj)
|
|
|
|
{
|
|
|
|
return !IS_ERR_OR_NULL(READ_ONCE(obj->mm.pages));
|
|
|
|
}
|
|
|
|
|
2016-10-28 19:58:35 +07:00
|
|
|
static inline void
|
|
|
|
__i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
|
2012-09-05 03:02:54 +07:00
|
|
|
{
|
2017-10-14 03:26:13 +07:00
|
|
|
GEM_BUG_ON(!i915_gem_object_has_pages(obj));
|
2016-10-28 19:58:35 +07:00
|
|
|
|
2016-10-28 19:58:37 +07:00
|
|
|
atomic_inc(&obj->mm.pages_pin_count);
|
2016-10-28 19:58:35 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
|
|
|
|
{
|
2016-10-28 19:58:37 +07:00
|
|
|
return atomic_read(&obj->mm.pages_pin_count);
|
2016-10-28 19:58:35 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
__i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
|
|
|
|
{
|
2017-10-14 03:26:13 +07:00
|
|
|
GEM_BUG_ON(!i915_gem_object_has_pages(obj));
|
2016-10-28 19:58:35 +07:00
|
|
|
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
|
|
|
|
|
2016-10-28 19:58:37 +07:00
|
|
|
atomic_dec(&obj->mm.pages_pin_count);
|
2012-09-05 03:02:54 +07:00
|
|
|
}
|
2016-04-08 18:11:11 +07:00
|
|
|
|
2016-10-28 19:58:37 +07:00
|
|
|
static inline void
|
|
|
|
i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
|
2012-09-05 03:02:54 +07:00
|
|
|
{
|
2016-10-28 19:58:35 +07:00
|
|
|
__i915_gem_object_unpin_pages(obj);
|
2012-09-05 03:02:54 +07:00
|
|
|
}
|
|
|
|
|
2016-11-01 19:11:34 +07:00
|
|
|
enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */
|
|
|
|
I915_MM_NORMAL = 0,
|
|
|
|
I915_MM_SHRINKER
|
|
|
|
};
|
|
|
|
|
|
|
|
void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
|
|
|
|
enum i915_mm_subclass subclass);
|
2016-10-28 19:58:36 +07:00
|
|
|
void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
|
2016-10-28 19:58:35 +07:00
|
|
|
|
drm/i915: Support for creating write combined type vmaps
vmaps has a provision for controlling the page protection bits, with which
we can use to control the mapping type, e.g. WB, WC, UC or even WT.
To allow the caller to choose their mapping type, we add a parameter to
i915_gem_object_pin_map - but we still only allow one vmap to be cached
per object. If the object is currently not pinned, then we recreate the
previous vmap with the new access type, but if it was pinned we report an
error. This effectively limits the access via i915_gem_object_pin_map to a
single mapping type for the lifetime of the object. Not usually a problem,
but something to be aware of when setting up the object's vmap.
We will want to vary the access type to enable WC mappings of ringbuffer
and context objects on !llc platforms, as well as other objects where we
need coherent access to the GPU's pages without going through the GTT
v2: Remove the redundant braces around pin count check and fix the marker
in documentation (Chris)
v3:
- Add a new enum for the vmalloc mapping type & pass that as an argument to
i915_object_pin_map. (Tvrtko)
- Use PAGE_MASK to extract or filter the mapping type info and remove a
superfluous BUG_ON.(Tvrtko)
v4:
- Rename the enums and clean up the pin_map function. (Chris)
v5: Drop the VM_NO_GUARD, minor cosmetics.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Akash Goel <akash.goel@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1471001999-17787-1-git-send-email-chris@chris-wilson.co.uk
2016-08-12 18:39:58 +07:00
|
|
|
enum i915_map_type {
|
|
|
|
I915_MAP_WB = 0,
|
|
|
|
I915_MAP_WC,
|
2017-08-28 17:46:31 +07:00
|
|
|
#define I915_MAP_OVERRIDE BIT(31)
|
|
|
|
I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE,
|
|
|
|
I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE,
|
drm/i915: Support for creating write combined type vmaps
vmaps has a provision for controlling the page protection bits, with which
we can use to control the mapping type, e.g. WB, WC, UC or even WT.
To allow the caller to choose their mapping type, we add a parameter to
i915_gem_object_pin_map - but we still only allow one vmap to be cached
per object. If the object is currently not pinned, then we recreate the
previous vmap with the new access type, but if it was pinned we report an
error. This effectively limits the access via i915_gem_object_pin_map to a
single mapping type for the lifetime of the object. Not usually a problem,
but something to be aware of when setting up the object's vmap.
We will want to vary the access type to enable WC mappings of ringbuffer
and context objects on !llc platforms, as well as other objects where we
need coherent access to the GPU's pages without going through the GTT
v2: Remove the redundant braces around pin count check and fix the marker
in documentation (Chris)
v3:
- Add a new enum for the vmalloc mapping type & pass that as an argument to
i915_object_pin_map. (Tvrtko)
- Use PAGE_MASK to extract or filter the mapping type info and remove a
superfluous BUG_ON.(Tvrtko)
v4:
- Rename the enums and clean up the pin_map function. (Chris)
v5: Drop the VM_NO_GUARD, minor cosmetics.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Akash Goel <akash.goel@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1471001999-17787-1-git-send-email-chris@chris-wilson.co.uk
2016-08-12 18:39:58 +07:00
|
|
|
};
|
|
|
|
|
2016-04-08 18:11:11 +07:00
|
|
|
/**
|
|
|
|
* i915_gem_object_pin_map - return a contiguous mapping of the entire object
|
2016-12-31 18:20:10 +07:00
|
|
|
* @obj: the object to map into kernel address space
|
|
|
|
* @type: the type of mapping, used to select pgprot_t
|
2016-04-08 18:11:11 +07:00
|
|
|
*
|
|
|
|
* Calls i915_gem_object_pin_pages() to prevent reaping of the object's
|
|
|
|
* pages and then returns a contiguous mapping of the backing storage into
|
drm/i915: Support for creating write combined type vmaps
vmaps has a provision for controlling the page protection bits, with which
we can use to control the mapping type, e.g. WB, WC, UC or even WT.
To allow the caller to choose their mapping type, we add a parameter to
i915_gem_object_pin_map - but we still only allow one vmap to be cached
per object. If the object is currently not pinned, then we recreate the
previous vmap with the new access type, but if it was pinned we report an
error. This effectively limits the access via i915_gem_object_pin_map to a
single mapping type for the lifetime of the object. Not usually a problem,
but something to be aware of when setting up the object's vmap.
We will want to vary the access type to enable WC mappings of ringbuffer
and context objects on !llc platforms, as well as other objects where we
need coherent access to the GPU's pages without going through the GTT
v2: Remove the redundant braces around pin count check and fix the marker
in documentation (Chris)
v3:
- Add a new enum for the vmalloc mapping type & pass that as an argument to
i915_object_pin_map. (Tvrtko)
- Use PAGE_MASK to extract or filter the mapping type info and remove a
superfluous BUG_ON.(Tvrtko)
v4:
- Rename the enums and clean up the pin_map function. (Chris)
v5: Drop the VM_NO_GUARD, minor cosmetics.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Akash Goel <akash.goel@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1471001999-17787-1-git-send-email-chris@chris-wilson.co.uk
2016-08-12 18:39:58 +07:00
|
|
|
* the kernel address space. Based on the @type of mapping, the PTE will be
|
|
|
|
* set to either WriteBack or WriteCombine (via pgprot_t).
|
2016-04-08 18:11:11 +07:00
|
|
|
*
|
2016-10-28 19:58:37 +07:00
|
|
|
* The caller is responsible for calling i915_gem_object_unpin_map() when the
|
|
|
|
* mapping is no longer required.
|
2016-04-08 18:11:11 +07:00
|
|
|
*
|
2016-04-12 20:46:16 +07:00
|
|
|
* Returns the pointer through which to access the mapped object, or an
|
|
|
|
* ERR_PTR() on error.
|
2016-04-08 18:11:11 +07:00
|
|
|
*/
|
drm/i915: Support for creating write combined type vmaps
vmaps has a provision for controlling the page protection bits, with which
we can use to control the mapping type, e.g. WB, WC, UC or even WT.
To allow the caller to choose their mapping type, we add a parameter to
i915_gem_object_pin_map - but we still only allow one vmap to be cached
per object. If the object is currently not pinned, then we recreate the
previous vmap with the new access type, but if it was pinned we report an
error. This effectively limits the access via i915_gem_object_pin_map to a
single mapping type for the lifetime of the object. Not usually a problem,
but something to be aware of when setting up the object's vmap.
We will want to vary the access type to enable WC mappings of ringbuffer
and context objects on !llc platforms, as well as other objects where we
need coherent access to the GPU's pages without going through the GTT
v2: Remove the redundant braces around pin count check and fix the marker
in documentation (Chris)
v3:
- Add a new enum for the vmalloc mapping type & pass that as an argument to
i915_object_pin_map. (Tvrtko)
- Use PAGE_MASK to extract or filter the mapping type info and remove a
superfluous BUG_ON.(Tvrtko)
v4:
- Rename the enums and clean up the pin_map function. (Chris)
v5: Drop the VM_NO_GUARD, minor cosmetics.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Akash Goel <akash.goel@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1471001999-17787-1-git-send-email-chris@chris-wilson.co.uk
2016-08-12 18:39:58 +07:00
|
|
|
void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
|
|
|
|
enum i915_map_type type);
|
2016-04-08 18:11:11 +07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* i915_gem_object_unpin_map - releases an earlier mapping
|
2016-12-31 18:20:10 +07:00
|
|
|
* @obj: the object to unmap
|
2016-04-08 18:11:11 +07:00
|
|
|
*
|
|
|
|
* After pinning the object and mapping its pages, once you are finished
|
|
|
|
* with your access, call i915_gem_object_unpin_map() to release the pin
|
|
|
|
* upon the mapping. Once the pin count reaches zero, that mapping may be
|
|
|
|
* removed.
|
|
|
|
*/
|
|
|
|
static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
|
|
|
|
{
|
|
|
|
i915_gem_object_unpin_pages(obj);
|
|
|
|
}
|
|
|
|
|
2016-08-18 23:16:47 +07:00
|
|
|
int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
|
|
|
|
unsigned int *needs_clflush);
|
|
|
|
int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
|
|
|
|
unsigned int *needs_clflush);
|
2017-03-10 07:09:42 +07:00
|
|
|
#define CLFLUSH_BEFORE BIT(0)
|
|
|
|
#define CLFLUSH_AFTER BIT(1)
|
|
|
|
#define CLFLUSH_FLAGS (CLFLUSH_BEFORE | CLFLUSH_AFTER)
|
2016-08-18 23:16:47 +07:00
|
|
|
|
|
|
|
static inline void
|
|
|
|
i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
|
|
|
|
{
|
|
|
|
i915_gem_object_unpin_pages(obj);
|
|
|
|
}
|
|
|
|
|
2010-11-26 01:00:26 +07:00
|
|
|
int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
|
2011-02-07 09:16:14 +07:00
|
|
|
int i915_gem_dumb_create(struct drm_file *file_priv,
|
|
|
|
struct drm_device *dev,
|
|
|
|
struct drm_mode_create_dumb *args);
|
2014-12-24 10:11:17 +07:00
|
|
|
int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
|
|
|
|
uint32_t handle, uint64_t *offset);
|
2016-08-26 01:05:19 +07:00
|
|
|
int i915_gem_mmap_gtt_version(void);
|
2016-05-20 17:54:06 +07:00
|
|
|
|
|
|
|
void i915_gem_track_fb(struct drm_i915_gem_object *old,
|
|
|
|
struct drm_i915_gem_object *new,
|
|
|
|
unsigned frontbuffer_bits);
|
|
|
|
|
2016-10-28 19:58:46 +07:00
|
|
|
int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno);
|
2011-12-14 19:57:08 +07:00
|
|
|
|
2018-02-21 16:56:36 +07:00
|
|
|
struct i915_request *
|
2016-03-16 18:00:37 +07:00
|
|
|
i915_gem_find_active_request(struct intel_engine_cs *engine);
|
2014-02-25 22:11:23 +07:00
|
|
|
|
2017-03-17 00:13:02 +07:00
|
|
|
static inline bool i915_reset_backoff(struct i915_gpu_error *error)
|
|
|
|
{
|
|
|
|
return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool i915_reset_handoff(struct i915_gpu_error *error)
|
2012-11-15 23:17:22 +07:00
|
|
|
{
|
2017-03-17 00:13:02 +07:00
|
|
|
return unlikely(test_bit(I915_RESET_HANDOFF, &error->flags));
|
2016-04-13 23:35:03 +07:00
|
|
|
}
|
|
|
|
|
2016-09-09 20:11:47 +07:00
|
|
|
static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
|
2016-04-13 23:35:03 +07:00
|
|
|
{
|
2016-09-09 20:11:47 +07:00
|
|
|
return unlikely(test_bit(I915_WEDGED, &error->flags));
|
2012-11-15 23:17:22 +07:00
|
|
|
}
|
|
|
|
|
2017-03-17 00:13:02 +07:00
|
|
|
static inline bool i915_reset_backoff_or_wedged(struct i915_gpu_error *error)
|
2012-11-15 23:17:22 +07:00
|
|
|
{
|
2017-03-17 00:13:02 +07:00
|
|
|
return i915_reset_backoff(error) | i915_terminally_wedged(error);
|
2013-11-12 19:44:19 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline u32 i915_reset_count(struct i915_gpu_error *error)
|
|
|
|
{
|
2016-09-09 20:11:47 +07:00
|
|
|
return READ_ONCE(error->reset_count);
|
2012-11-15 23:17:22 +07:00
|
|
|
}
|
2012-02-15 18:25:36 +07:00
|
|
|
|
2017-06-20 16:57:48 +07:00
|
|
|
static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
|
|
|
|
struct intel_engine_cs *engine)
|
|
|
|
{
|
|
|
|
return READ_ONCE(error->reset_engine_count[engine->id]);
|
|
|
|
}
|
|
|
|
|
2018-02-21 16:56:36 +07:00
|
|
|
struct i915_request *
|
drm/i915: Add support for per engine reset recovery
This change implements support for per-engine reset as an initial, less
intrusive hang recovery option to be attempted before falling back to the
legacy full GPU reset recovery mode if necessary. This is only supported
from Gen8 onwards.
Hangchecker determines which engines are hung and invokes error handler to
recover from it. Error handler schedules recovery for each of those engines
that are hung. The recovery procedure is as follows,
- identifies the request that caused the hang and it is dropped
- force engine to idle: this is done by issuing a reset request
- reset the engine
- re-init the engine to resume submissions.
If engine reset fails then we fall back to heavy weight full gpu reset
which resets all engines and reinitiazes complete state of HW and SW.
v2: Rebase.
v3: s/*engine_reset*/*reset_engine*/; freeze engine and irqs before
calling i915_gem_reset_engine (Chris).
v4: Rebase, modify i915_gem_reset_prepare to use a ring mask and
reuse the function for reset_engine.
v5: intel_reset_engine_start/cancel instead of request/unrequest_reset.
v6: Clean up reset_engine function to not require mutex, i.e. no need to call
revoke/restore_fences and _retire_requests (Chris).
v7: Remove leftovers from v5, i.e. no need to disable irq, hold
forcewake or wakeup the handoff bit (Chris).
v8: engine_retire_requests should be (and it was) static; explain that
we have to re-init the engine after reset, which is why the init_hw call
is needed; check reset-in-progress flag (Chris).
v9: Rebase, include code to pass the active request to gem_reset_engine
(as it is already done in full reset). Remove unnecessary
intel_reset_engine_start/cancel, these are executed as part of the
reset.
v10: Rebase, use the right I915_RESET_ENGINE flag.
v11: Fixup to call reset_finish_engine even on error.
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Tomas Elf <tomas.elf@intel.com>
Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170615201828.23144-6-michel.thierry@intel.com
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170620095751.13127-6-chris@chris-wilson.co.uk
2017-06-20 16:57:47 +07:00
|
|
|
i915_gem_reset_prepare_engine(struct intel_engine_cs *engine);
|
2017-01-17 22:59:06 +07:00
|
|
|
int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
|
2018-04-07 05:03:54 +07:00
|
|
|
void i915_gem_reset(struct drm_i915_private *dev_priv,
|
|
|
|
unsigned int stalled_mask);
|
drm/i915: Add support for per engine reset recovery
This change implements support for per-engine reset as an initial, less
intrusive hang recovery option to be attempted before falling back to the
legacy full GPU reset recovery mode if necessary. This is only supported
from Gen8 onwards.
Hangchecker determines which engines are hung and invokes error handler to
recover from it. Error handler schedules recovery for each of those engines
that are hung. The recovery procedure is as follows,
- identifies the request that caused the hang and it is dropped
- force engine to idle: this is done by issuing a reset request
- reset the engine
- re-init the engine to resume submissions.
If engine reset fails then we fall back to heavy weight full gpu reset
which resets all engines and reinitiazes complete state of HW and SW.
v2: Rebase.
v3: s/*engine_reset*/*reset_engine*/; freeze engine and irqs before
calling i915_gem_reset_engine (Chris).
v4: Rebase, modify i915_gem_reset_prepare to use a ring mask and
reuse the function for reset_engine.
v5: intel_reset_engine_start/cancel instead of request/unrequest_reset.
v6: Clean up reset_engine function to not require mutex, i.e. no need to call
revoke/restore_fences and _retire_requests (Chris).
v7: Remove leftovers from v5, i.e. no need to disable irq, hold
forcewake or wakeup the handoff bit (Chris).
v8: engine_retire_requests should be (and it was) static; explain that
we have to re-init the engine after reset, which is why the init_hw call
is needed; check reset-in-progress flag (Chris).
v9: Rebase, include code to pass the active request to gem_reset_engine
(as it is already done in full reset). Remove unnecessary
intel_reset_engine_start/cancel, these are executed as part of the
reset.
v10: Rebase, use the right I915_RESET_ENGINE flag.
v11: Fixup to call reset_finish_engine even on error.
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Tomas Elf <tomas.elf@intel.com>
Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170615201828.23144-6-michel.thierry@intel.com
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170620095751.13127-6-chris@chris-wilson.co.uk
2017-06-20 16:57:47 +07:00
|
|
|
void i915_gem_reset_finish_engine(struct intel_engine_cs *engine);
|
2017-01-04 21:51:10 +07:00
|
|
|
void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
|
2016-09-09 20:11:53 +07:00
|
|
|
void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
|
2017-03-17 00:13:04 +07:00
|
|
|
bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
|
drm/i915: Add support for per engine reset recovery
This change implements support for per-engine reset as an initial, less
intrusive hang recovery option to be attempted before falling back to the
legacy full GPU reset recovery mode if necessary. This is only supported
from Gen8 onwards.
Hangchecker determines which engines are hung and invokes error handler to
recover from it. Error handler schedules recovery for each of those engines
that are hung. The recovery procedure is as follows,
- identifies the request that caused the hang and it is dropped
- force engine to idle: this is done by issuing a reset request
- reset the engine
- re-init the engine to resume submissions.
If engine reset fails then we fall back to heavy weight full gpu reset
which resets all engines and reinitiazes complete state of HW and SW.
v2: Rebase.
v3: s/*engine_reset*/*reset_engine*/; freeze engine and irqs before
calling i915_gem_reset_engine (Chris).
v4: Rebase, modify i915_gem_reset_prepare to use a ring mask and
reuse the function for reset_engine.
v5: intel_reset_engine_start/cancel instead of request/unrequest_reset.
v6: Clean up reset_engine function to not require mutex, i.e. no need to call
revoke/restore_fences and _retire_requests (Chris).
v7: Remove leftovers from v5, i.e. no need to disable irq, hold
forcewake or wakeup the handoff bit (Chris).
v8: engine_retire_requests should be (and it was) static; explain that
we have to re-init the engine after reset, which is why the init_hw call
is needed; check reset-in-progress flag (Chris).
v9: Rebase, include code to pass the active request to gem_reset_engine
(as it is already done in full reset). Remove unnecessary
intel_reset_engine_start/cancel, these are executed as part of the
reset.
v10: Rebase, use the right I915_RESET_ENGINE flag.
v11: Fixup to call reset_finish_engine even on error.
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Tomas Elf <tomas.elf@intel.com>
Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170615201828.23144-6-michel.thierry@intel.com
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170620095751.13127-6-chris@chris-wilson.co.uk
2017-06-20 16:57:47 +07:00
|
|
|
void i915_gem_reset_engine(struct intel_engine_cs *engine,
|
2018-04-07 05:03:53 +07:00
|
|
|
struct i915_request *request,
|
|
|
|
bool stalled);
|
2017-02-22 18:40:48 +07:00
|
|
|
|
2017-01-24 18:01:35 +07:00
|
|
|
void i915_gem_init_mmio(struct drm_i915_private *i915);
|
2016-12-01 21:16:38 +07:00
|
|
|
int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
|
|
|
|
int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);
|
2016-11-16 15:55:31 +07:00
|
|
|
void i915_gem_init_swizzling(struct drm_i915_private *dev_priv);
|
2018-06-04 16:00:32 +07:00
|
|
|
void i915_gem_fini(struct drm_i915_private *dev_priv);
|
2016-12-01 21:16:39 +07:00
|
|
|
void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv);
|
2017-02-14 00:15:58 +07:00
|
|
|
int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
|
2018-07-09 19:20:42 +07:00
|
|
|
unsigned int flags, long timeout);
|
2016-12-01 21:16:38 +07:00
|
|
|
int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv);
|
2018-05-31 15:22:46 +07:00
|
|
|
void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
|
2016-12-01 21:16:38 +07:00
|
|
|
void i915_gem_resume(struct drm_i915_private *dev_priv);
|
2018-06-07 04:45:20 +07:00
|
|
|
vm_fault_t i915_gem_fault(struct vm_fault *vmf);
|
2016-10-28 19:58:27 +07:00
|
|
|
int i915_gem_object_wait(struct drm_i915_gem_object *obj,
|
|
|
|
unsigned int flags,
|
|
|
|
long timeout,
|
|
|
|
struct intel_rps_client *rps);
|
2016-11-15 03:41:05 +07:00
|
|
|
int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
|
|
|
|
unsigned int flags,
|
2018-04-19 01:40:52 +07:00
|
|
|
const struct i915_sched_attr *attr);
|
2016-11-15 03:41:05 +07:00
|
|
|
#define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX
|
|
|
|
|
2015-04-27 19:41:14 +07:00
|
|
|
int __must_check
|
2017-04-12 18:01:11 +07:00
|
|
|
i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
|
|
|
|
int __must_check
|
|
|
|
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
|
2010-11-23 22:26:33 +07:00
|
|
|
int __must_check
|
2012-03-26 15:10:27 +07:00
|
|
|
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
|
2016-08-15 16:49:06 +07:00
|
|
|
struct i915_vma * __must_check
|
2011-04-14 15:41:17 +07:00
|
|
|
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
|
|
|
|
u32 alignment,
|
2018-02-20 20:42:06 +07:00
|
|
|
const struct i915_ggtt_view *view,
|
|
|
|
unsigned int flags);
|
2016-08-15 16:49:06 +07:00
|
|
|
void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
|
2014-05-21 18:42:56 +07:00
|
|
|
int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
|
2010-08-07 17:01:39 +07:00
|
|
|
int align);
|
2017-06-20 18:05:45 +07:00
|
|
|
int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file);
|
2010-11-09 02:18:58 +07:00
|
|
|
void i915_gem_release(struct drm_device *dev, struct drm_file *file);
|
2008-07-31 02:06:12 +07:00
|
|
|
|
2011-04-04 15:44:39 +07:00
|
|
|
int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
|
|
|
|
enum i915_cache_level cache_level);
|
|
|
|
|
2012-05-10 20:25:09 +07:00
|
|
|
struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
|
|
|
|
struct dma_buf *dma_buf);
|
|
|
|
|
|
|
|
struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
|
|
|
|
struct drm_gem_object *gem_obj, int flags);
|
|
|
|
|
2014-08-06 20:04:48 +07:00
|
|
|
static inline struct i915_hw_ppgtt *
|
|
|
|
i915_vm_to_ppgtt(struct i915_address_space *vm)
|
|
|
|
{
|
2018-06-05 22:37:58 +07:00
|
|
|
return container_of(vm, struct i915_hw_ppgtt, vm);
|
2014-08-06 20:04:48 +07:00
|
|
|
}
|
|
|
|
|
2016-11-11 17:43:54 +07:00
|
|
|
/* i915_gem_fence_reg.c */
|
2017-09-04 15:01:01 +07:00
|
|
|
struct drm_i915_fence_reg *
|
|
|
|
i915_reserve_fence(struct drm_i915_private *dev_priv);
|
|
|
|
void i915_unreserve_fence(struct drm_i915_fence_reg *fence);
|
2016-08-18 23:17:00 +07:00
|
|
|
|
2017-01-04 21:51:10 +07:00
|
|
|
void i915_gem_revoke_fences(struct drm_i915_private *dev_priv);
|
2016-11-16 15:55:33 +07:00
|
|
|
void i915_gem_restore_fences(struct drm_i915_private *dev_priv);
|
2015-07-24 18:55:11 +07:00
|
|
|
|
2016-11-16 15:55:33 +07:00
|
|
|
void i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv);
|
2016-10-28 19:58:36 +07:00
|
|
|
void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
|
|
|
|
struct sg_table *pages);
|
|
|
|
void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
|
|
|
|
struct sg_table *pages);
|
2015-07-24 22:40:14 +07:00
|
|
|
|
2017-06-20 18:05:47 +07:00
|
|
|
static inline struct i915_gem_context *
|
|
|
|
__i915_gem_context_lookup_rcu(struct drm_i915_file_private *file_priv, u32 id)
|
|
|
|
{
|
|
|
|
return idr_find(&file_priv->context_idr, id);
|
|
|
|
}
|
|
|
|
|
2016-05-24 20:53:36 +07:00
|
|
|
static inline struct i915_gem_context *
|
|
|
|
i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id)
|
|
|
|
{
|
|
|
|
struct i915_gem_context *ctx;
|
|
|
|
|
2017-06-20 18:05:47 +07:00
|
|
|
rcu_read_lock();
|
|
|
|
ctx = __i915_gem_context_lookup_rcu(file_priv, id);
|
|
|
|
if (ctx && !kref_get_unless_zero(&ctx->ref))
|
|
|
|
ctx = NULL;
|
|
|
|
rcu_read_unlock();
|
2016-05-24 20:53:36 +07:00
|
|
|
|
|
|
|
return ctx;
|
|
|
|
}
|
|
|
|
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
int i915_perf_open_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file);
|
2017-08-04 00:05:50 +07:00
|
|
|
int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file);
|
|
|
|
int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file);
|
2017-06-13 18:23:03 +07:00
|
|
|
void i915_oa_init_reg_state(struct intel_engine_cs *engine,
|
|
|
|
struct i915_gem_context *ctx,
|
|
|
|
uint32_t *reg_state);
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
|
2013-12-07 05:11:23 +07:00
|
|
|
/* i915_gem_evict.c */
|
2016-08-04 22:32:18 +07:00
|
|
|
int __must_check i915_gem_evict_something(struct i915_address_space *vm,
|
2016-08-04 22:32:22 +07:00
|
|
|
u64 min_size, u64 alignment,
|
2013-12-07 05:11:23 +07:00
|
|
|
unsigned cache_level,
|
2016-08-04 22:32:22 +07:00
|
|
|
u64 start, u64 end,
|
2014-02-14 20:01:11 +07:00
|
|
|
unsigned flags);
|
2017-01-11 18:23:11 +07:00
|
|
|
int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
|
|
|
|
struct drm_mm_node *node,
|
|
|
|
unsigned int flags);
|
drm/i915: Eliminate lots of iterations over the execobjects array
The major scaling bottleneck in execbuffer is the processing of the
execobjects. Creating an auxiliary list is inefficient when compared to
using the execobject array we already have allocated.
Reservation is then split into phases. As we lookup up the VMA, we
try and bind it back into active location. Only if that fails, do we add
it to the unbound list for phase 2. In phase 2, we try and add all those
objects that could not fit into their previous location, with fallback
to retrying all objects and evicting the VM in case of severe
fragmentation. (This is the same as before, except that phase 1 is now
done inline with looking up the VMA to avoid an iteration over the
execobject array. In the ideal case, we eliminate the separate reservation
phase). During the reservation phase, we only evict from the VM between
passes (rather than currently as we try to fit every new VMA). In
testing with Unreal Engine's Atlantis demo which stresses the eviction
logic on gen7 class hardware, this speed up the framerate by a factor of
2.
The second loop amalgamation is between move_to_gpu and move_to_active.
As we always submit the request, even if incomplete, we can use the
current request to track active VMA as we perform the flushes and
synchronisation required.
The next big advancement is to avoid copying back to the user any
execobjects and relocations that are not changed.
v2: Add a Theory of Operation spiel.
v3: Fall back to slow relocations in preparation for flushing userptrs.
v4: Document struct members, factor out eb_validate_vma(), add a few
more comments to explain some magic and hide other magic behind macros.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2017-06-16 21:05:19 +07:00
|
|
|
int i915_gem_evict_vm(struct i915_address_space *vm);
|
2012-02-09 23:15:46 +07:00
|
|
|
|
2017-12-06 19:49:14 +07:00
|
|
|
void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv);
|
|
|
|
|
drm/i915: Split out GTT specific header file
This file contains all necessary defines, prototypes and typesdefs for
manipulating GEN graphics address translation (this does not include the
legacy AGP driver)
Reiterating the comment in the header,
"Please try to maintain the following order within this file unless it
makes sense to do otherwise. From top to bottom:
1. typedefs
2. #defines, and macros
3. structure definitions
4. function prototypes
Within each section, please try to order by generation in ascending
order, from top to bottom (ie. GEN6 on the top, GEN8 on the bottom)."
I've made some minor cleanups, and fixed a couple of typos while here -
but there should be no functional changes.
The purpose of the patch is to reduce clutter in our main header file,
making room for new growth, and make documentation of our interfaces
easier by splitting things out.
With a little more work, like making i915_gtt a pointer, we could
potentially completely isolate this header from i915_drv.h. At the
moment however, I don't think it's worth the effort.
Personally, I would have liked to put the PTE encoding functions in this
file too, but I didn't want to rock the boat too much.
A similar patch has been in use on my machine for some time. This exact
patch though has only been compile tested.
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-03-23 12:47:21 +07:00
|
|
|
/* belongs in i915_gem_gtt.h */
|
2016-05-06 21:40:21 +07:00
|
|
|
static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv)
|
2012-11-05 00:21:27 +07:00
|
|
|
{
|
2016-08-18 23:16:40 +07:00
|
|
|
wmb();
|
2016-05-06 21:40:21 +07:00
|
|
|
if (INTEL_GEN(dev_priv) < 6)
|
2012-11-05 00:21:27 +07:00
|
|
|
intel_gtt_chipset_flush();
|
|
|
|
}
|
2013-12-07 05:11:14 +07:00
|
|
|
|
2012-04-24 21:47:39 +07:00
|
|
|
/* i915_gem_stolen.c */
|
2015-07-03 05:25:07 +07:00
|
|
|
int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv,
|
|
|
|
struct drm_mm_node *node, u64 size,
|
|
|
|
unsigned alignment);
|
2015-09-15 01:19:57 +07:00
|
|
|
int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
|
|
|
|
struct drm_mm_node *node, u64 size,
|
|
|
|
unsigned alignment, u64 start,
|
|
|
|
u64 end);
|
2015-07-03 05:25:07 +07:00
|
|
|
void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
|
|
|
|
struct drm_mm_node *node);
|
2016-11-16 15:55:35 +07:00
|
|
|
int i915_gem_init_stolen(struct drm_i915_private *dev_priv);
|
2012-04-24 21:47:39 +07:00
|
|
|
void i915_gem_cleanup_stolen(struct drm_device *dev);
|
2012-11-15 18:32:26 +07:00
|
|
|
struct drm_i915_gem_object *
|
2017-12-11 22:18:22 +07:00
|
|
|
i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
|
|
|
|
resource_size_t size);
|
2013-02-20 04:31:37 +07:00
|
|
|
struct drm_i915_gem_object *
|
2016-12-01 21:16:36 +07:00
|
|
|
i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv,
|
2017-12-11 22:18:22 +07:00
|
|
|
resource_size_t stolen_offset,
|
|
|
|
resource_size_t gtt_offset,
|
|
|
|
resource_size_t size);
|
2012-04-24 21:47:39 +07:00
|
|
|
|
2016-10-28 19:58:30 +07:00
|
|
|
/* i915_gem_internal.c */
|
|
|
|
struct drm_i915_gem_object *
|
|
|
|
i915_gem_object_create_internal(struct drm_i915_private *dev_priv,
|
2017-01-12 20:04:31 +07:00
|
|
|
phys_addr_t size);
|
2016-10-28 19:58:30 +07:00
|
|
|
|
2015-03-18 16:46:04 +07:00
|
|
|
/* i915_gem_shrinker.c */
|
2017-11-23 18:53:38 +07:00
|
|
|
unsigned long i915_gem_shrink(struct drm_i915_private *i915,
|
2015-10-01 18:18:25 +07:00
|
|
|
unsigned long target,
|
2017-09-07 06:19:30 +07:00
|
|
|
unsigned long *nr_scanned,
|
2015-03-18 16:46:04 +07:00
|
|
|
unsigned flags);
|
|
|
|
#define I915_SHRINK_PURGEABLE 0x1
|
|
|
|
#define I915_SHRINK_UNBOUND 0x2
|
|
|
|
#define I915_SHRINK_BOUND 0x4
|
2015-10-01 18:18:29 +07:00
|
|
|
#define I915_SHRINK_ACTIVE 0x8
|
2016-04-08 18:11:12 +07:00
|
|
|
#define I915_SHRINK_VMAPS 0x10
|
2017-11-23 18:53:38 +07:00
|
|
|
unsigned long i915_gem_shrink_all(struct drm_i915_private *i915);
|
|
|
|
void i915_gem_shrinker_register(struct drm_i915_private *i915);
|
|
|
|
void i915_gem_shrinker_unregister(struct drm_i915_private *i915);
|
2018-07-11 14:36:02 +07:00
|
|
|
void i915_gem_shrinker_taints_mutex(struct mutex *mutex);
|
2015-03-18 16:46:04 +07:00
|
|
|
|
2008-07-31 02:06:12 +07:00
|
|
|
/* i915_gem_tiling.c */
|
2013-08-02 00:39:55 +07:00
|
|
|
static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
|
2012-12-04 04:03:14 +07:00
|
|
|
{
|
2016-06-24 20:00:21 +07:00
|
|
|
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
|
2012-12-04 04:03:14 +07:00
|
|
|
|
|
|
|
return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
|
2016-08-05 16:14:23 +07:00
|
|
|
i915_gem_object_is_tiled(obj);
|
2012-12-04 04:03:14 +07:00
|
|
|
}
|
|
|
|
|
2017-01-09 23:16:13 +07:00
|
|
|
u32 i915_gem_fence_size(struct drm_i915_private *dev_priv, u32 size,
|
|
|
|
unsigned int tiling, unsigned int stride);
|
|
|
|
u32 i915_gem_fence_alignment(struct drm_i915_private *dev_priv, u32 size,
|
|
|
|
unsigned int tiling, unsigned int stride);
|
|
|
|
|
2009-02-18 08:08:50 +07:00
|
|
|
/* i915_debugfs.c */
|
2013-10-16 16:49:58 +07:00
|
|
|
#ifdef CONFIG_DEBUG_FS
|
2016-06-24 20:00:17 +07:00
|
|
|
int i915_debugfs_register(struct drm_i915_private *dev_priv);
|
2015-04-10 20:59:32 +07:00
|
|
|
int i915_debugfs_connector_add(struct drm_connector *connector);
|
drm/i915: debugfs spring cleaning
Just like with sysfs, we do some major overhaul.
Pass dev_priv instead of dev to all feature macros (IS_, HAS_,
INTEL_, etc.). This has the side effect that a bunch of functions
now get dev_priv passed instead of dev.
All calls to INTEL_INFO()->gen have been replaced with
INTEL_GEN().
We want access to to_i915(node->minor->dev) in a lot of places,
so add the node_to_i915() helper to accommodate for this.
Finally, we have quite a few cases where we get a void * pointer,
and need to cast it to drm_device *, only to run to_i915() on it.
Add cast_to_i915() to do this.
v2: Don't introduce extra dev (Chris)
v3: Make pipe_crc_info have a pointer to drm_i915_private instead of
drm_device. This saves a bit of space, since we never use
drm_device anywhere in these functions.
Also some minor fixup that I missed in the previous version.
v4: Changed the code a bit so that dev_priv is passed directly
to various functions, thus removing the need for the
cast_to_i915() helper. Also did some additional cleanup.
v5: Additional cleanup of newly introduced changes.
v6: Rebase again because of conflict.
Signed-off-by: David Weinehall <david.weinehall@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20160822105931.pcbe2lpsgzckzboa@boom
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
2016-08-22 17:59:31 +07:00
|
|
|
void intel_display_crc_init(struct drm_i915_private *dev_priv);
|
2013-10-16 00:55:40 +07:00
|
|
|
#else
|
2016-07-12 18:55:29 +07:00
|
|
|
static inline int i915_debugfs_register(struct drm_i915_private *dev_priv) {return 0;}
|
2015-07-13 14:23:19 +07:00
|
|
|
static inline int i915_debugfs_connector_add(struct drm_connector *connector)
|
|
|
|
{ return 0; }
|
2016-08-25 16:07:01 +07:00
|
|
|
static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {}
|
2013-10-16 00:55:40 +07:00
|
|
|
#endif
|
2013-07-12 20:50:57 +07:00
|
|
|
|
2014-08-22 20:41:39 +07:00
|
|
|
const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
|
2009-02-18 08:08:50 +07:00
|
|
|
|
2014-02-19 01:15:46 +07:00
|
|
|
/* i915_cmd_parser.c */
|
2016-05-04 20:25:36 +07:00
|
|
|
int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
|
2016-08-18 23:17:10 +07:00
|
|
|
void intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
|
2016-07-27 15:07:26 +07:00
|
|
|
void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
|
|
|
|
int intel_engine_cmd_parser(struct intel_engine_cs *engine,
|
|
|
|
struct drm_i915_gem_object *batch_obj,
|
|
|
|
struct drm_i915_gem_object *shadow_batch_obj,
|
|
|
|
u32 batch_start_offset,
|
|
|
|
u32 batch_len,
|
|
|
|
bool is_master);
|
2014-02-19 01:15:46 +07:00
|
|
|
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
/* i915_perf.c */
|
|
|
|
extern void i915_perf_init(struct drm_i915_private *dev_priv);
|
|
|
|
extern void i915_perf_fini(struct drm_i915_private *dev_priv);
|
2016-11-08 02:49:53 +07:00
|
|
|
extern void i915_perf_register(struct drm_i915_private *dev_priv);
|
|
|
|
extern void i915_perf_unregister(struct drm_i915_private *dev_priv);
|
drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
2016-11-08 02:49:47 +07:00
|
|
|
|
2008-08-26 05:11:06 +07:00
|
|
|
/* i915_suspend.c */
|
2016-12-01 21:16:44 +07:00
|
|
|
extern int i915_save_state(struct drm_i915_private *dev_priv);
|
|
|
|
extern int i915_restore_state(struct drm_i915_private *dev_priv);
|
2008-10-01 02:14:26 +07:00
|
|
|
|
2012-04-11 11:17:01 +07:00
|
|
|
/* i915_sysfs.c */
|
2016-08-22 17:32:43 +07:00
|
|
|
void i915_setup_sysfs(struct drm_i915_private *dev_priv);
|
|
|
|
void i915_teardown_sysfs(struct drm_i915_private *dev_priv);
|
2012-04-11 11:17:01 +07:00
|
|
|
|
2017-01-25 05:57:49 +07:00
|
|
|
/* intel_lpe_audio.c */
|
|
|
|
int intel_lpe_audio_init(struct drm_i915_private *dev_priv);
|
|
|
|
void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv);
|
|
|
|
void intel_lpe_audio_irq_handler(struct drm_i915_private *dev_priv);
|
2017-01-25 05:57:50 +07:00
|
|
|
void intel_lpe_audio_notify(struct drm_i915_private *dev_priv,
|
2017-04-27 23:02:26 +07:00
|
|
|
enum pipe pipe, enum port port,
|
|
|
|
const void *eld, int ls_clock, bool dp_output);
|
2017-01-25 05:57:49 +07:00
|
|
|
|
2010-07-21 05:44:45 +07:00
|
|
|
/* intel_i2c.c */
|
2016-12-01 21:16:42 +07:00
|
|
|
extern int intel_setup_gmbus(struct drm_i915_private *dev_priv);
|
|
|
|
extern void intel_teardown_gmbus(struct drm_i915_private *dev_priv);
|
2015-03-27 05:20:22 +07:00
|
|
|
extern bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv,
|
|
|
|
unsigned int pin);
|
2018-01-09 02:55:41 +07:00
|
|
|
extern int intel_gmbus_output_aksv(struct i2c_adapter *adapter);
|
2012-03-28 01:36:14 +07:00
|
|
|
|
2015-03-27 05:20:20 +07:00
|
|
|
extern struct i2c_adapter *
|
|
|
|
intel_gmbus_get_adapter(struct drm_i915_private *dev_priv, unsigned int pin);
|
2010-09-24 18:52:03 +07:00
|
|
|
extern void intel_gmbus_set_speed(struct i2c_adapter *adapter, int speed);
|
|
|
|
extern void intel_gmbus_force_bit(struct i2c_adapter *adapter, bool force_bit);
|
2013-05-06 19:52:08 +07:00
|
|
|
static inline bool intel_gmbus_is_forced_bit(struct i2c_adapter *adapter)
|
2010-09-28 22:41:32 +07:00
|
|
|
{
|
|
|
|
return container_of(adapter, struct intel_gmbus, adapter)->force_bit;
|
|
|
|
}
|
2016-12-01 21:16:44 +07:00
|
|
|
extern void intel_i2c_reset(struct drm_i915_private *dev_priv);
|
2010-07-21 05:44:45 +07:00
|
|
|
|
2015-12-14 17:50:49 +07:00
|
|
|
/* intel_bios.c */
|
2017-03-10 20:27:57 +07:00
|
|
|
void intel_bios_init(struct drm_i915_private *dev_priv);
|
2018-02-14 15:21:49 +07:00
|
|
|
void intel_bios_cleanup(struct drm_i915_private *dev_priv);
|
2015-12-15 18:16:15 +07:00
|
|
|
bool intel_bios_is_valid_vbt(const void *buf, size_t size);
|
2016-03-16 17:43:29 +07:00
|
|
|
bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv);
|
2016-03-16 17:43:30 +07:00
|
|
|
bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin);
|
2016-06-03 16:17:43 +07:00
|
|
|
bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port port);
|
2016-03-16 17:43:31 +07:00
|
|
|
bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port);
|
2016-05-04 18:45:22 +07:00
|
|
|
bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum port port);
|
2016-03-16 17:43:32 +07:00
|
|
|
bool intel_bios_is_dsi_present(struct drm_i915_private *dev_priv, enum port *port);
|
2016-03-31 17:41:47 +07:00
|
|
|
bool intel_bios_is_port_hpd_inverted(struct drm_i915_private *dev_priv,
|
|
|
|
enum port port);
|
2016-10-14 21:26:50 +07:00
|
|
|
bool intel_bios_is_lspcon_present(struct drm_i915_private *dev_priv,
|
|
|
|
enum port port);
|
|
|
|
|
2010-10-08 06:01:13 +07:00
|
|
|
/* intel_acpi.c */
|
|
|
|
#ifdef CONFIG_ACPI
|
|
|
|
extern void intel_register_dsm_handler(void);
|
|
|
|
extern void intel_unregister_dsm_handler(void);
|
|
|
|
#else
|
|
|
|
static inline void intel_register_dsm_handler(void) { return; }
|
|
|
|
static inline void intel_unregister_dsm_handler(void) { return; }
|
|
|
|
#endif /* CONFIG_ACPI */
|
|
|
|
|
2016-07-05 16:40:20 +07:00
|
|
|
/* intel_device_info.c */
|
|
|
|
static inline struct intel_device_info *
|
|
|
|
mkwrite_device_info(struct drm_i915_private *dev_priv)
|
|
|
|
{
|
|
|
|
return (struct intel_device_info *)&dev_priv->info;
|
|
|
|
}
|
|
|
|
|
DRM: i915: add mode setting support
This commit adds i915 driver support for the DRM mode setting APIs.
Currently, VGA, LVDS, SDVO DVI & VGA, TV and DVO LVDS outputs are
supported. HDMI, DisplayPort and additional SDVO output support will
follow.
Support for the mode setting code is controlled by the new 'modeset'
module option. A new config option, CONFIG_DRM_I915_KMS controls the
default behavior, and whether a PCI ID list is built into the module for
use by user level module utilities.
Note that if mode setting is enabled, user level drivers that access
display registers directly or that don't use the kernel graphics memory
manager will likely corrupt kernel graphics memory, disrupt output
configuration (possibly leading to hangs and/or blank displays), and
prevent panic/oops messages from appearing. So use caution when
enabling this code; be sure your user level code supports the new
interfaces.
A new SysRq key, 'g', provides emergency support for switching back to
the kernel's framebuffer console; which is useful for testing.
Co-authors: Dave Airlie <airlied@linux.ie>, Hong Liu <hong.liu@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2008-11-08 05:24:08 +07:00
|
|
|
/* modesetting */
|
2012-04-10 20:50:11 +07:00
|
|
|
extern void intel_modeset_init_hw(struct drm_device *dev);
|
2016-10-25 22:58:02 +07:00
|
|
|
extern int intel_modeset_init(struct drm_device *dev);
|
DRM: i915: add mode setting support
This commit adds i915 driver support for the DRM mode setting APIs.
Currently, VGA, LVDS, SDVO DVI & VGA, TV and DVO LVDS outputs are
supported. HDMI, DisplayPort and additional SDVO output support will
follow.
Support for the mode setting code is controlled by the new 'modeset'
module option. A new config option, CONFIG_DRM_I915_KMS controls the
default behavior, and whether a PCI ID list is built into the module for
use by user level module utilities.
Note that if mode setting is enabled, user level drivers that access
display registers directly or that don't use the kernel graphics memory
manager will likely corrupt kernel graphics memory, disrupt output
configuration (possibly leading to hangs and/or blank displays), and
prevent panic/oops messages from appearing. So use caution when
enabling this code; be sure your user level code supports the new
interfaces.
A new SysRq key, 'g', provides emergency support for switching back to
the kernel's framebuffer console; which is useful for testing.
Co-authors: Dave Airlie <airlied@linux.ie>, Hong Liu <hong.liu@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2008-11-08 05:24:08 +07:00
|
|
|
extern void intel_modeset_cleanup(struct drm_device *dev);
|
2016-06-24 20:00:15 +07:00
|
|
|
extern int intel_connector_register(struct drm_connector *);
|
2016-06-17 17:40:33 +07:00
|
|
|
extern void intel_connector_unregister(struct drm_connector *);
|
2016-11-16 19:32:42 +07:00
|
|
|
extern int intel_modeset_vga_set_state(struct drm_i915_private *dev_priv,
|
|
|
|
bool state);
|
2015-07-13 21:30:25 +07:00
|
|
|
extern void intel_display_resume(struct drm_device *dev);
|
2016-11-16 15:55:39 +07:00
|
|
|
extern void i915_redisable_vga(struct drm_i915_private *dev_priv);
|
|
|
|
extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv);
|
2016-05-06 20:48:28 +07:00
|
|
|
extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val);
|
2016-11-23 21:21:44 +07:00
|
|
|
extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv);
|
2017-01-26 17:19:19 +07:00
|
|
|
extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val);
|
drm/i915: Interactive RPS mode
RPS provides a feedback loop where we use the load during the previous
evaluation interval to decide whether to up or down clock the GPU
frequency. Our responsiveness is split into 3 regimes, a high and low
plateau with the intent to keep the gpu clocked high to cover occasional
stalls under high load, and low despite occasional glitches under steady
low load, and inbetween. However, we run into situations like kodi where
we want to stay at low power (video decoding is done efficiently
inside the fixed function HW and doesn't need high clocks even for high
bitrate streams), but just occasionally the pipeline is more complex
than a video decode and we need a smidgen of extra GPU power to present
on time. In the high power regime, we sample at sub frame intervals with
a bias to upclocking, and conversely at low power we sample over a few
frames worth to provide what we consider to be the right levels of
responsiveness respectively. At low power, we more or less expect to be
kicked out to high power at the start of a busy sequence by waitboosting.
Prior to commit e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active
request") whenever we missed the frame or stalled, we would immediate go
full throttle and upclock the GPU to max. But in commit e9af4ea2b9e7, we
relaxed the waitboosting to only apply if the pipeline was deep to avoid
over-committing resources for a near miss. Sadly though, a near miss is
still a miss, and perceptible as jitter in the frame delivery.
To try and prevent the near miss before having to resort to boosting
after the fact, we use the pageflip queue as an indication that we are
in an "interactive" regime and so should sample the load more frequently
to provide power before the frame misses it vblank. This will make us
more favorable to providing a small power increase (one or two bins) as
required rather than going all the way to maximum and then having to
work back down again. (We still keep the waitboosting mechanism around
just in case a dramatic change in system load requires urgent uplocking,
faster than we can provide in a few evaluation intervals.)
v2: Reduce rps_set_interactive to a boolean parameter to avoid the
confusion of what if they wanted a new power mode after pinning to a
different mode (which to choose?)
v3: Only reprogram RPS while the GT is awake, it will be set when we
wake the GT, and while off warns about being used outside of rpm.
v4: Fix deferred application of interactive mode
v5: s/state/interactive/
v6: Group the mutex with its principle in a substruct
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107111
Fixes: e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180731132629.3381-1-chris@chris-wilson.co.uk
2018-07-31 20:26:29 +07:00
|
|
|
extern void intel_rps_mark_interactive(struct drm_i915_private *i915,
|
|
|
|
bool interactive);
|
2016-11-29 00:37:12 +07:00
|
|
|
extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
|
2014-07-01 16:36:17 +07:00
|
|
|
bool enable);
|
2010-04-07 15:15:53 +07:00
|
|
|
|
2012-07-13 01:01:05 +07:00
|
|
|
int i915_reg_read_ioctl(struct drm_device *dev, void *data,
|
|
|
|
struct drm_file *file);
|
2012-03-29 03:39:37 +07:00
|
|
|
|
2010-08-05 02:26:07 +07:00
|
|
|
/* overlay */
|
2016-05-06 21:40:21 +07:00
|
|
|
extern struct intel_overlay_error_state *
|
|
|
|
intel_overlay_capture_error_state(struct drm_i915_private *dev_priv);
|
2013-05-23 17:55:35 +07:00
|
|
|
extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
|
|
|
|
struct intel_overlay_error_state *error);
|
2010-11-21 20:12:35 +07:00
|
|
|
|
2016-05-06 21:40:21 +07:00
|
|
|
extern struct intel_display_error_state *
|
|
|
|
intel_display_capture_error_state(struct drm_i915_private *dev_priv);
|
2013-05-23 17:55:35 +07:00
|
|
|
extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
|
2010-11-21 20:12:35 +07:00
|
|
|
struct intel_display_error_state *error);
|
2010-08-05 02:26:07 +07:00
|
|
|
|
2014-11-14 09:50:10 +07:00
|
|
|
int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val);
|
2018-01-30 21:29:38 +07:00
|
|
|
int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, u32 mbox,
|
2018-01-30 21:29:39 +07:00
|
|
|
u32 val, int fast_timeout_us,
|
|
|
|
int slow_timeout_ms);
|
2018-01-30 21:29:38 +07:00
|
|
|
#define sandybridge_pcode_write(dev_priv, mbox, val) \
|
2018-01-30 21:29:39 +07:00
|
|
|
sandybridge_pcode_write_timeout(dev_priv, mbox, val, 500, 0)
|
2018-01-30 21:29:38 +07:00
|
|
|
|
2016-12-05 23:27:37 +07:00
|
|
|
int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
|
|
|
|
u32 reply_mask, u32 reply, int timeout_base_ms);
|
2013-05-22 19:36:16 +07:00
|
|
|
|
|
|
|
/* intel_sideband.c */
|
2015-01-16 22:12:17 +07:00
|
|
|
u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr);
|
2017-01-26 17:19:19 +07:00
|
|
|
int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val);
|
2013-05-22 19:36:20 +07:00
|
|
|
u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr);
|
2016-02-04 23:55:15 +07:00
|
|
|
u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg);
|
|
|
|
void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, u8 port, u32 reg, u32 val);
|
2013-08-27 19:12:14 +07:00
|
|
|
u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg);
|
|
|
|
void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
|
|
|
|
u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg);
|
|
|
|
void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
|
2013-11-05 02:52:44 +07:00
|
|
|
u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg);
|
|
|
|
void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
|
2013-09-05 19:41:49 +07:00
|
|
|
u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg);
|
|
|
|
void vlv_dpio_write(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 val);
|
2013-05-22 19:36:16 +07:00
|
|
|
u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
|
|
|
|
enum intel_sbi_destination destination);
|
|
|
|
void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
|
|
|
|
enum intel_sbi_destination destination);
|
2013-12-10 13:44:55 +07:00
|
|
|
u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg);
|
|
|
|
void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
|
2013-04-18 05:54:58 +07:00
|
|
|
|
2016-04-27 19:44:17 +07:00
|
|
|
/* intel_dpio_phy.c */
|
2016-12-02 15:23:51 +07:00
|
|
|
void bxt_port_to_phy_channel(struct drm_i915_private *dev_priv, enum port port,
|
2016-10-19 14:59:00 +07:00
|
|
|
enum dpio_phy *phy, enum dpio_channel *ch);
|
2016-10-06 23:22:19 +07:00
|
|
|
void bxt_ddi_phy_set_signal_level(struct drm_i915_private *dev_priv,
|
|
|
|
enum port port, u32 margin, u32 scale,
|
|
|
|
u32 enable, u32 deemphasis);
|
2016-10-06 23:22:17 +07:00
|
|
|
void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy);
|
|
|
|
void bxt_ddi_phy_uninit(struct drm_i915_private *dev_priv, enum dpio_phy phy);
|
|
|
|
bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv,
|
|
|
|
enum dpio_phy phy);
|
|
|
|
bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv,
|
|
|
|
enum dpio_phy phy);
|
2017-10-27 20:43:48 +07:00
|
|
|
uint8_t bxt_ddi_phy_calc_lane_lat_optim_mask(uint8_t lane_count);
|
2016-10-06 23:22:17 +07:00
|
|
|
void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder,
|
|
|
|
uint8_t lane_lat_optim_mask);
|
|
|
|
uint8_t bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder);
|
|
|
|
|
2016-04-27 19:44:17 +07:00
|
|
|
void chv_set_phy_signal_level(struct intel_encoder *encoder,
|
|
|
|
u32 deemph_reg_value, u32 margin_reg_value,
|
|
|
|
bool uniq_trans_scale);
|
2016-04-27 19:44:18 +07:00
|
|
|
void chv_data_lane_soft_reset(struct intel_encoder *encoder,
|
2017-11-01 03:51:18 +07:00
|
|
|
const struct intel_crtc_state *crtc_state,
|
2016-04-27 19:44:18 +07:00
|
|
|
bool reset);
|
2017-11-01 03:51:18 +07:00
|
|
|
void chv_phy_pre_pll_enable(struct intel_encoder *encoder,
|
|
|
|
const struct intel_crtc_state *crtc_state);
|
|
|
|
void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
|
|
|
|
const struct intel_crtc_state *crtc_state);
|
2016-04-27 19:44:20 +07:00
|
|
|
void chv_phy_release_cl2_override(struct intel_encoder *encoder);
|
2017-11-01 03:51:18 +07:00
|
|
|
void chv_phy_post_pll_disable(struct intel_encoder *encoder,
|
|
|
|
const struct intel_crtc_state *old_crtc_state);
|
2016-04-27 19:44:17 +07:00
|
|
|
|
2016-04-27 19:44:22 +07:00
|
|
|
void vlv_set_phy_signal_level(struct intel_encoder *encoder,
|
|
|
|
u32 demph_reg_value, u32 preemph_reg_value,
|
|
|
|
u32 uniqtranscale_reg_value, u32 tx3_demph);
|
2017-11-01 03:51:18 +07:00
|
|
|
void vlv_phy_pre_pll_enable(struct intel_encoder *encoder,
|
|
|
|
const struct intel_crtc_state *crtc_state);
|
|
|
|
void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
|
|
|
|
const struct intel_crtc_state *crtc_state);
|
|
|
|
void vlv_phy_reset_lanes(struct intel_encoder *encoder,
|
|
|
|
const struct intel_crtc_state *old_crtc_state);
|
2016-04-27 19:44:22 +07:00
|
|
|
|
2015-01-24 02:04:25 +07:00
|
|
|
int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
|
|
|
|
int intel_freq_opcode(struct drm_i915_private *dev_priv, int val);
|
2017-11-22 01:18:51 +07:00
|
|
|
u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
|
2017-03-15 22:43:00 +07:00
|
|
|
const i915_reg_t reg);
|
2013-11-23 16:25:42 +07:00
|
|
|
|
2017-11-22 01:18:44 +07:00
|
|
|
u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat1);
|
|
|
|
|
2017-11-22 01:18:51 +07:00
|
|
|
static inline u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
|
|
|
|
const i915_reg_t reg)
|
|
|
|
{
|
|
|
|
return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(dev_priv, reg), 1000);
|
|
|
|
}
|
|
|
|
|
2013-10-05 11:22:51 +07:00
|
|
|
#define I915_READ8(reg) dev_priv->uncore.funcs.mmio_readb(dev_priv, (reg), true)
|
|
|
|
#define I915_WRITE8(reg, val) dev_priv->uncore.funcs.mmio_writeb(dev_priv, (reg), (val), true)
|
|
|
|
|
|
|
|
#define I915_READ16(reg) dev_priv->uncore.funcs.mmio_readw(dev_priv, (reg), true)
|
|
|
|
#define I915_WRITE16(reg, val) dev_priv->uncore.funcs.mmio_writew(dev_priv, (reg), (val), true)
|
|
|
|
#define I915_READ16_NOTRACE(reg) dev_priv->uncore.funcs.mmio_readw(dev_priv, (reg), false)
|
|
|
|
#define I915_WRITE16_NOTRACE(reg, val) dev_priv->uncore.funcs.mmio_writew(dev_priv, (reg), (val), false)
|
|
|
|
|
|
|
|
#define I915_READ(reg) dev_priv->uncore.funcs.mmio_readl(dev_priv, (reg), true)
|
|
|
|
#define I915_WRITE(reg, val) dev_priv->uncore.funcs.mmio_writel(dev_priv, (reg), (val), true)
|
|
|
|
#define I915_READ_NOTRACE(reg) dev_priv->uncore.funcs.mmio_readl(dev_priv, (reg), false)
|
|
|
|
#define I915_WRITE_NOTRACE(reg, val) dev_priv->uncore.funcs.mmio_writel(dev_priv, (reg), (val), false)
|
|
|
|
|
2014-03-21 20:16:43 +07:00
|
|
|
/* Be very careful with read/write 64-bit values. On 32-bit machines, they
|
|
|
|
* will be implemented using 2 32-bit writes in an arbitrary order with
|
|
|
|
* an arbitrary delay between them. This can cause the hardware to
|
|
|
|
* act upon the intermediate value, possibly leading to corruption and
|
2016-09-06 21:45:38 +07:00
|
|
|
* machine death. For this reason we do not support I915_WRITE64, or
|
|
|
|
* dev_priv->uncore.funcs.mmio_writeq.
|
|
|
|
*
|
|
|
|
* When reading a 64-bit value as two 32-bit values, the delay may cause
|
|
|
|
* the two reads to mismatch, e.g. a timestamp overflowing. Also note that
|
|
|
|
* occasionally a 64-bit register does not actualy support a full readq
|
|
|
|
* and must be read using two 32-bit reads.
|
|
|
|
*
|
|
|
|
* You have been warned.
|
2014-03-21 20:16:43 +07:00
|
|
|
*/
|
2013-10-05 11:22:51 +07:00
|
|
|
#define I915_READ64(reg) dev_priv->uncore.funcs.mmio_readq(dev_priv, (reg), true)
|
2010-11-09 16:17:32 +07:00
|
|
|
|
2014-03-21 19:41:53 +07:00
|
|
|
#define I915_READ64_2x32(lower_reg, upper_reg) ({ \
|
2015-09-08 20:17:13 +07:00
|
|
|
u32 upper, lower, old_upper, loop = 0; \
|
|
|
|
upper = I915_READ(upper_reg); \
|
2015-07-15 15:50:42 +07:00
|
|
|
do { \
|
2015-09-08 20:17:13 +07:00
|
|
|
old_upper = upper; \
|
2015-07-15 15:50:42 +07:00
|
|
|
lower = I915_READ(lower_reg); \
|
2015-09-08 20:17:13 +07:00
|
|
|
upper = I915_READ(upper_reg); \
|
|
|
|
} while (upper != old_upper && loop++ < 2); \
|
2015-07-15 15:50:42 +07:00
|
|
|
(u64)upper << 32 | lower; })
|
2014-03-21 19:41:53 +07:00
|
|
|
|
2010-11-09 16:17:32 +07:00
|
|
|
#define POSTING_READ(reg) (void)I915_READ_NOTRACE(reg)
|
|
|
|
#define POSTING_READ16(reg) (void)I915_READ16_NOTRACE(reg)
|
|
|
|
|
2015-10-22 19:34:56 +07:00
|
|
|
#define __raw_read(x, s) \
|
2017-03-23 17:19:43 +07:00
|
|
|
static inline uint##x##_t __raw_i915_read##x(const struct drm_i915_private *dev_priv, \
|
drm/i915: Type safe register read/write
Make I915_READ and I915_WRITE more type safe by wrapping the register
offset in a struct. This should eliminate most of the fumbles we've had
with misplaced parens.
This only takes care of normal mmio registers. We could extend the idea
to other register types and define each with its own struct. That way
you wouldn't be able to accidentally pass the wrong thing to a specific
register access function.
The gpio_reg setup is probably the ugliest thing left. But I figure I'd
just leave it for now, and wait for some divine inspiration to strike
before making it nice.
As for the generated code, it's actually a bit better sometimes. Eg.
looking at i915_irq_handler(), we can see the following change:
lea 0x70024(%rdx,%rax,1),%r9d
mov $0x1,%edx
- movslq %r9d,%r9
- mov %r9,%rsi
- mov %r9,-0x58(%rbp)
- callq *0xd8(%rbx)
+ mov %r9d,%esi
+ mov %r9d,-0x48(%rbp)
callq *0xd8(%rbx)
So previously gcc thought the register offset might be signed and
decided to sign extend it, just in case. The rest appears to be
mostly just minor shuffling of instructions.
v2: i915_mmio_reg_{offset,equal,valid}() helpers added
s/_REG/_MMIO/ in the register defines
mo more switch statements left to worry about
ring_emit stuff got sorted in a prep patch
cmd parser, lrc context and w/a batch buildup also in prep patch
vgpu stuff cleaned up and moved to a prep patch
all other unrelated changes split out
v3: Rebased due to BXT DSI/BLC, MOCS, etc.
v4: Rebased due to churn, s/i915_mmio_reg_t/i915_reg_t/
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/1447853606-2751-1-git-send-email-ville.syrjala@linux.intel.com
2015-11-18 20:33:26 +07:00
|
|
|
i915_reg_t reg) \
|
2015-10-22 19:34:56 +07:00
|
|
|
{ \
|
drm/i915: Type safe register read/write
Make I915_READ and I915_WRITE more type safe by wrapping the register
offset in a struct. This should eliminate most of the fumbles we've had
with misplaced parens.
This only takes care of normal mmio registers. We could extend the idea
to other register types and define each with its own struct. That way
you wouldn't be able to accidentally pass the wrong thing to a specific
register access function.
The gpio_reg setup is probably the ugliest thing left. But I figure I'd
just leave it for now, and wait for some divine inspiration to strike
before making it nice.
As for the generated code, it's actually a bit better sometimes. Eg.
looking at i915_irq_handler(), we can see the following change:
lea 0x70024(%rdx,%rax,1),%r9d
mov $0x1,%edx
- movslq %r9d,%r9
- mov %r9,%rsi
- mov %r9,-0x58(%rbp)
- callq *0xd8(%rbx)
+ mov %r9d,%esi
+ mov %r9d,-0x48(%rbp)
callq *0xd8(%rbx)
So previously gcc thought the register offset might be signed and
decided to sign extend it, just in case. The rest appears to be
mostly just minor shuffling of instructions.
v2: i915_mmio_reg_{offset,equal,valid}() helpers added
s/_REG/_MMIO/ in the register defines
mo more switch statements left to worry about
ring_emit stuff got sorted in a prep patch
cmd parser, lrc context and w/a batch buildup also in prep patch
vgpu stuff cleaned up and moved to a prep patch
all other unrelated changes split out
v3: Rebased due to BXT DSI/BLC, MOCS, etc.
v4: Rebased due to churn, s/i915_mmio_reg_t/i915_reg_t/
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/1447853606-2751-1-git-send-email-ville.syrjala@linux.intel.com
2015-11-18 20:33:26 +07:00
|
|
|
return read##s(dev_priv->regs + i915_mmio_reg_offset(reg)); \
|
2015-10-22 19:34:56 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
#define __raw_write(x, s) \
|
2017-03-23 17:19:43 +07:00
|
|
|
static inline void __raw_i915_write##x(const struct drm_i915_private *dev_priv, \
|
drm/i915: Type safe register read/write
Make I915_READ and I915_WRITE more type safe by wrapping the register
offset in a struct. This should eliminate most of the fumbles we've had
with misplaced parens.
This only takes care of normal mmio registers. We could extend the idea
to other register types and define each with its own struct. That way
you wouldn't be able to accidentally pass the wrong thing to a specific
register access function.
The gpio_reg setup is probably the ugliest thing left. But I figure I'd
just leave it for now, and wait for some divine inspiration to strike
before making it nice.
As for the generated code, it's actually a bit better sometimes. Eg.
looking at i915_irq_handler(), we can see the following change:
lea 0x70024(%rdx,%rax,1),%r9d
mov $0x1,%edx
- movslq %r9d,%r9
- mov %r9,%rsi
- mov %r9,-0x58(%rbp)
- callq *0xd8(%rbx)
+ mov %r9d,%esi
+ mov %r9d,-0x48(%rbp)
callq *0xd8(%rbx)
So previously gcc thought the register offset might be signed and
decided to sign extend it, just in case. The rest appears to be
mostly just minor shuffling of instructions.
v2: i915_mmio_reg_{offset,equal,valid}() helpers added
s/_REG/_MMIO/ in the register defines
mo more switch statements left to worry about
ring_emit stuff got sorted in a prep patch
cmd parser, lrc context and w/a batch buildup also in prep patch
vgpu stuff cleaned up and moved to a prep patch
all other unrelated changes split out
v3: Rebased due to BXT DSI/BLC, MOCS, etc.
v4: Rebased due to churn, s/i915_mmio_reg_t/i915_reg_t/
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/1447853606-2751-1-git-send-email-ville.syrjala@linux.intel.com
2015-11-18 20:33:26 +07:00
|
|
|
i915_reg_t reg, uint##x##_t val) \
|
2015-10-22 19:34:56 +07:00
|
|
|
{ \
|
drm/i915: Type safe register read/write
Make I915_READ and I915_WRITE more type safe by wrapping the register
offset in a struct. This should eliminate most of the fumbles we've had
with misplaced parens.
This only takes care of normal mmio registers. We could extend the idea
to other register types and define each with its own struct. That way
you wouldn't be able to accidentally pass the wrong thing to a specific
register access function.
The gpio_reg setup is probably the ugliest thing left. But I figure I'd
just leave it for now, and wait for some divine inspiration to strike
before making it nice.
As for the generated code, it's actually a bit better sometimes. Eg.
looking at i915_irq_handler(), we can see the following change:
lea 0x70024(%rdx,%rax,1),%r9d
mov $0x1,%edx
- movslq %r9d,%r9
- mov %r9,%rsi
- mov %r9,-0x58(%rbp)
- callq *0xd8(%rbx)
+ mov %r9d,%esi
+ mov %r9d,-0x48(%rbp)
callq *0xd8(%rbx)
So previously gcc thought the register offset might be signed and
decided to sign extend it, just in case. The rest appears to be
mostly just minor shuffling of instructions.
v2: i915_mmio_reg_{offset,equal,valid}() helpers added
s/_REG/_MMIO/ in the register defines
mo more switch statements left to worry about
ring_emit stuff got sorted in a prep patch
cmd parser, lrc context and w/a batch buildup also in prep patch
vgpu stuff cleaned up and moved to a prep patch
all other unrelated changes split out
v3: Rebased due to BXT DSI/BLC, MOCS, etc.
v4: Rebased due to churn, s/i915_mmio_reg_t/i915_reg_t/
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/1447853606-2751-1-git-send-email-ville.syrjala@linux.intel.com
2015-11-18 20:33:26 +07:00
|
|
|
write##s(val, dev_priv->regs + i915_mmio_reg_offset(reg)); \
|
2015-10-22 19:34:56 +07:00
|
|
|
}
|
|
|
|
__raw_read(8, b)
|
|
|
|
__raw_read(16, w)
|
|
|
|
__raw_read(32, l)
|
|
|
|
__raw_read(64, q)
|
|
|
|
|
|
|
|
__raw_write(8, b)
|
|
|
|
__raw_write(16, w)
|
|
|
|
__raw_write(32, l)
|
|
|
|
__raw_write(64, q)
|
|
|
|
|
|
|
|
#undef __raw_read
|
|
|
|
#undef __raw_write
|
|
|
|
|
2015-04-07 22:21:02 +07:00
|
|
|
/* These are untraced mmio-accessors that are only valid to be used inside
|
2016-10-25 19:48:02 +07:00
|
|
|
* critical sections, such as inside IRQ handlers, where forcewake is explicitly
|
2015-04-07 22:21:02 +07:00
|
|
|
* controlled.
|
2016-10-25 19:48:02 +07:00
|
|
|
*
|
2015-04-07 22:21:02 +07:00
|
|
|
* Think twice, and think again, before using these.
|
2016-10-25 19:48:02 +07:00
|
|
|
*
|
|
|
|
* As an example, these accessors can possibly be used between:
|
|
|
|
*
|
|
|
|
* spin_lock_irq(&dev_priv->uncore.lock);
|
|
|
|
* intel_uncore_forcewake_get__locked();
|
|
|
|
*
|
|
|
|
* and
|
|
|
|
*
|
|
|
|
* intel_uncore_forcewake_put__locked();
|
|
|
|
* spin_unlock_irq(&dev_priv->uncore.lock);
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* Note: some registers may not need forcewake held, so
|
|
|
|
* intel_uncore_forcewake_{get,put} can be omitted, see
|
|
|
|
* intel_uncore_forcewake_for_reg().
|
|
|
|
*
|
|
|
|
* Certain architectures will die if the same cacheline is concurrently accessed
|
|
|
|
* by different clients (e.g. on Ivybridge). Access to registers should
|
|
|
|
* therefore generally be serialised, by either the dev_priv->uncore.lock or
|
|
|
|
* a more localised lock guarding all access to that bank of registers.
|
2015-04-07 22:21:02 +07:00
|
|
|
*/
|
2015-10-22 19:34:56 +07:00
|
|
|
#define I915_READ_FW(reg__) __raw_i915_read32(dev_priv, (reg__))
|
|
|
|
#define I915_WRITE_FW(reg__, val__) __raw_i915_write32(dev_priv, (reg__), (val__))
|
2016-06-30 21:33:45 +07:00
|
|
|
#define I915_WRITE64_FW(reg__, val__) __raw_i915_write64(dev_priv, (reg__), (val__))
|
2015-04-07 22:21:02 +07:00
|
|
|
#define POSTING_READ_FW(reg__) (void)I915_READ_FW(reg__)
|
|
|
|
|
2013-01-17 21:31:29 +07:00
|
|
|
/* "Broadcast RGB" property */
|
|
|
|
#define INTEL_BROADCAST_RGB_AUTO 0
|
|
|
|
#define INTEL_BROADCAST_RGB_FULL 1
|
|
|
|
#define INTEL_BROADCAST_RGB_LIMITED 2
|
2010-11-08 16:09:41 +07:00
|
|
|
|
2016-10-14 16:13:44 +07:00
|
|
|
static inline i915_reg_t i915_vgacntrl_reg(struct drm_i915_private *dev_priv)
|
2013-01-26 02:44:46 +07:00
|
|
|
{
|
2016-10-14 16:13:44 +07:00
|
|
|
if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
|
2013-01-26 02:44:46 +07:00
|
|
|
return VLV_VGACNTRL;
|
2016-10-14 16:13:44 +07:00
|
|
|
else if (INTEL_GEN(dev_priv) >= 5)
|
2014-07-21 16:53:40 +07:00
|
|
|
return CPU_VGACNTRL;
|
2013-01-26 02:44:46 +07:00
|
|
|
else
|
|
|
|
return VGACNTRL;
|
|
|
|
}
|
|
|
|
|
2013-05-22 00:03:17 +07:00
|
|
|
static inline unsigned long msecs_to_jiffies_timeout(const unsigned int m)
|
|
|
|
{
|
|
|
|
unsigned long j = msecs_to_jiffies(m);
|
|
|
|
|
|
|
|
return min_t(unsigned long, MAX_JIFFY_OFFSET, j + 1);
|
|
|
|
}
|
|
|
|
|
2014-12-04 17:12:54 +07:00
|
|
|
static inline unsigned long nsecs_to_jiffies_timeout(const u64 n)
|
|
|
|
{
|
2017-08-11 17:57:31 +07:00
|
|
|
/* nsecs_to_jiffies64() does not guard against overflow */
|
|
|
|
if (NSEC_PER_SEC % HZ &&
|
|
|
|
div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ)
|
|
|
|
return MAX_JIFFY_OFFSET;
|
|
|
|
|
2014-12-04 17:12:54 +07:00
|
|
|
return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1);
|
|
|
|
}
|
|
|
|
|
2013-12-19 23:29:40 +07:00
|
|
|
/*
|
|
|
|
* If you need to wait X milliseconds between events A and B, but event B
|
|
|
|
* doesn't happen exactly after event A, you record the timestamp (jiffies) of
|
|
|
|
* when event A happened, then just before event B you call this function and
|
|
|
|
* pass the timestamp as the first argument, and X as the second argument.
|
|
|
|
*/
|
|
|
|
static inline void
|
|
|
|
wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
|
|
|
|
{
|
2014-01-29 18:25:40 +07:00
|
|
|
unsigned long target_jiffies, tmp_jiffies, remaining_jiffies;
|
2013-12-19 23:29:40 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Don't re-read the value of "jiffies" every time since it may change
|
|
|
|
* behind our back and break the math.
|
|
|
|
*/
|
|
|
|
tmp_jiffies = jiffies;
|
|
|
|
target_jiffies = timestamp_jiffies +
|
|
|
|
msecs_to_jiffies_timeout(to_wait_ms);
|
|
|
|
|
|
|
|
if (time_after(target_jiffies, tmp_jiffies)) {
|
2014-01-29 18:25:40 +07:00
|
|
|
remaining_jiffies = target_jiffies - tmp_jiffies;
|
|
|
|
while (remaining_jiffies)
|
|
|
|
remaining_jiffies =
|
|
|
|
schedule_timeout_uninterruptible(remaining_jiffies);
|
2013-12-19 23:29:40 +07:00
|
|
|
}
|
|
|
|
}
|
2016-09-09 20:11:51 +07:00
|
|
|
|
|
|
|
static inline bool
|
2018-02-21 16:56:36 +07:00
|
|
|
__i915_request_irq_complete(const struct i915_request *rq)
|
drm/i915: Slaughter the thundering i915_wait_request herd
One particularly stressful scenario consists of many independent tasks
all competing for GPU time and waiting upon the results (e.g. realtime
transcoding of many, many streams). One bottleneck in particular is that
each client waits on its own results, but every client is woken up after
every batchbuffer - hence the thunder of hooves as then every client must
do its heavyweight dance to read a coherent seqno to see if it is the
lucky one.
Ideally, we only want one client to wake up after the interrupt and
check its request for completion. Since the requests must retire in
order, we can select the first client on the oldest request to be woken.
Once that client has completed his wait, we can then wake up the
next client and so on. However, all clients then incur latency as every
process in the chain may be delayed for scheduling - this may also then
cause some priority inversion. To reduce the latency, when a client
is added or removed from the list, we scan the tree for completed
seqno and wake up all the completed waiters in parallel.
Using igt/benchmarks/gem_latency, we can demonstrate this effect. The
benchmark measures the number of GPU cycles between completion of a
batch and the client waking up from a call to wait-ioctl. With many
concurrent waiters, with each on a different request, we observe that
the wakeup latency before the patch scales nearly linearly with the
number of waiters (before external factors kick in making the scaling much
worse). After applying the patch, we can see that only the single waiter
for the request is being woken up, providing a constant wakeup latency
for every operation. However, the situation is not quite as rosy for
many waiters on the same request, though to the best of my knowledge this
is much less likely in practice. Here, we can observe that the
concurrent waiters incur extra latency from being woken up by the
solitary bottom-half, rather than directly by the interrupt. This
appears to be scheduler induced (having discounted adverse effects from
having a rbtree walk/erase in the wakeup path), each additional
wake_up_process() costs approximately 1us on big core. Another effect of
performing the secondary wakeups from the first bottom-half is the
incurred delay this imposes on high priority threads - rather than
immediately returning to userspace and leaving the interrupt handler to
wake the others.
To offset the delay incurred with additional waiters on a request, we
could use a hybrid scheme that did a quick read in the interrupt handler
and dequeued all the completed waiters (incurring the overhead in the
interrupt handler, not the best plan either as we then incur GPU
submission latency) but we would still have to wake up the bottom-half
every time to do the heavyweight slow read. Or we could only kick the
waiters on the seqno with the same priority as the current task (i.e. in
the realtime waiter scenario, only it is woken up immediately by the
interrupt and simply queues the next waiter before returning to userspace,
minimising its delay at the expense of the chain, and also reducing
contention on its scheduler runqueue). This is effective at avoid long
pauses in the interrupt handler and at avoiding the extra latency in
realtime/high-priority waiters.
v2: Convert from a kworker per engine into a dedicated kthread for the
bottom-half.
v3: Rename request members and tweak comments.
v4: Use a per-engine spinlock in the breadcrumbs bottom-half.
v5: Fix race in locklessly checking waiter status and kicking the task on
adding a new waiter.
v6: Fix deciding when to force the timer to hide missing interrupts.
v7: Move the bottom-half from the kthread to the first client process.
v8: Reword a few comments
v9: Break the busy loop when the interrupt is unmasked or has fired.
v10: Comments, unnecessary churn, better debugging from Tvrtko
v11: Wake all completed waiters on removing the current bottom-half to
reduce the latency of waking up a herd of clients all waiting on the
same request.
v12: Rearrange missed-interrupt fault injection so that it works with
igt/drv_missed_irq_hang
v13: Rename intel_breadcrumb and friends to intel_wait in preparation
for signal handling.
v14: RCU commentary, assert_spin_locked
v15: Hide BUG_ON behind the compiler; report on gem_latency findings.
v16: Sort seqno-groups by priority so that first-waiter has the highest
task priority (and so avoid priority inversion).
v17: Add waiters to post-mortem GPU hang state.
v18: Return early for a completed wait after acquiring the spinlock.
Avoids adding ourselves to the tree if the is already complete, and
skips the awkward question of why we don't do completion wakeups for
waits earlier than or equal to ourselves.
v19: Prepare for init_breadcrumbs to fail. Later patches may want to
allocate during init, so be prepared to propagate back the error code.
Testcase: igt/gem_concurrent_blit
Testcase: igt/benchmarks/gem_latency
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: "Rogozhkin, Dmitry V" <dmitry.v.rogozhkin@intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Dave Gordon <david.s.gordon@intel.com>
Cc: "Goel, Akash" <akash.goel@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> #v18
Link: http://patchwork.freedesktop.org/patch/msgid/1467390209-3576-6-git-send-email-chris@chris-wilson.co.uk
2016-07-01 23:23:15 +07:00
|
|
|
{
|
2018-02-21 16:56:36 +07:00
|
|
|
struct intel_engine_cs *engine = rq->engine;
|
2017-02-23 14:44:14 +07:00
|
|
|
u32 seqno;
|
2016-07-01 23:23:16 +07:00
|
|
|
|
2017-02-23 14:44:07 +07:00
|
|
|
/* Note that the engine may have wrapped around the seqno, and
|
|
|
|
* so our request->global_seqno will be ahead of the hardware,
|
|
|
|
* even though it completed the request before wrapping. We catch
|
|
|
|
* this by kicking all the waiters before resetting the seqno
|
|
|
|
* in hardware, and also signal the fence.
|
|
|
|
*/
|
2018-02-21 16:56:36 +07:00
|
|
|
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
|
2017-02-23 14:44:07 +07:00
|
|
|
return true;
|
|
|
|
|
2017-02-23 14:44:14 +07:00
|
|
|
/* The request was dequeued before we were awoken. We check after
|
|
|
|
* inspecting the hw to confirm that this was the same request
|
|
|
|
* that generated the HWS update. The memory barriers within
|
|
|
|
* the request execution are sufficient to ensure that a check
|
|
|
|
* after reading the value from hw matches this request.
|
|
|
|
*/
|
2018-02-21 16:56:36 +07:00
|
|
|
seqno = i915_request_global_seqno(rq);
|
2017-02-23 14:44:14 +07:00
|
|
|
if (!seqno)
|
|
|
|
return false;
|
|
|
|
|
2016-07-01 23:23:22 +07:00
|
|
|
/* Before we do the heavier coherent read of the seqno,
|
|
|
|
* check the value (hopefully) in the CPU cacheline.
|
|
|
|
*/
|
2018-02-21 16:56:36 +07:00
|
|
|
if (__i915_request_completed(rq, seqno))
|
2016-07-01 23:23:22 +07:00
|
|
|
return true;
|
|
|
|
|
drm/i915: Slaughter the thundering i915_wait_request herd
One particularly stressful scenario consists of many independent tasks
all competing for GPU time and waiting upon the results (e.g. realtime
transcoding of many, many streams). One bottleneck in particular is that
each client waits on its own results, but every client is woken up after
every batchbuffer - hence the thunder of hooves as then every client must
do its heavyweight dance to read a coherent seqno to see if it is the
lucky one.
Ideally, we only want one client to wake up after the interrupt and
check its request for completion. Since the requests must retire in
order, we can select the first client on the oldest request to be woken.
Once that client has completed his wait, we can then wake up the
next client and so on. However, all clients then incur latency as every
process in the chain may be delayed for scheduling - this may also then
cause some priority inversion. To reduce the latency, when a client
is added or removed from the list, we scan the tree for completed
seqno and wake up all the completed waiters in parallel.
Using igt/benchmarks/gem_latency, we can demonstrate this effect. The
benchmark measures the number of GPU cycles between completion of a
batch and the client waking up from a call to wait-ioctl. With many
concurrent waiters, with each on a different request, we observe that
the wakeup latency before the patch scales nearly linearly with the
number of waiters (before external factors kick in making the scaling much
worse). After applying the patch, we can see that only the single waiter
for the request is being woken up, providing a constant wakeup latency
for every operation. However, the situation is not quite as rosy for
many waiters on the same request, though to the best of my knowledge this
is much less likely in practice. Here, we can observe that the
concurrent waiters incur extra latency from being woken up by the
solitary bottom-half, rather than directly by the interrupt. This
appears to be scheduler induced (having discounted adverse effects from
having a rbtree walk/erase in the wakeup path), each additional
wake_up_process() costs approximately 1us on big core. Another effect of
performing the secondary wakeups from the first bottom-half is the
incurred delay this imposes on high priority threads - rather than
immediately returning to userspace and leaving the interrupt handler to
wake the others.
To offset the delay incurred with additional waiters on a request, we
could use a hybrid scheme that did a quick read in the interrupt handler
and dequeued all the completed waiters (incurring the overhead in the
interrupt handler, not the best plan either as we then incur GPU
submission latency) but we would still have to wake up the bottom-half
every time to do the heavyweight slow read. Or we could only kick the
waiters on the seqno with the same priority as the current task (i.e. in
the realtime waiter scenario, only it is woken up immediately by the
interrupt and simply queues the next waiter before returning to userspace,
minimising its delay at the expense of the chain, and also reducing
contention on its scheduler runqueue). This is effective at avoid long
pauses in the interrupt handler and at avoiding the extra latency in
realtime/high-priority waiters.
v2: Convert from a kworker per engine into a dedicated kthread for the
bottom-half.
v3: Rename request members and tweak comments.
v4: Use a per-engine spinlock in the breadcrumbs bottom-half.
v5: Fix race in locklessly checking waiter status and kicking the task on
adding a new waiter.
v6: Fix deciding when to force the timer to hide missing interrupts.
v7: Move the bottom-half from the kthread to the first client process.
v8: Reword a few comments
v9: Break the busy loop when the interrupt is unmasked or has fired.
v10: Comments, unnecessary churn, better debugging from Tvrtko
v11: Wake all completed waiters on removing the current bottom-half to
reduce the latency of waking up a herd of clients all waiting on the
same request.
v12: Rearrange missed-interrupt fault injection so that it works with
igt/drv_missed_irq_hang
v13: Rename intel_breadcrumb and friends to intel_wait in preparation
for signal handling.
v14: RCU commentary, assert_spin_locked
v15: Hide BUG_ON behind the compiler; report on gem_latency findings.
v16: Sort seqno-groups by priority so that first-waiter has the highest
task priority (and so avoid priority inversion).
v17: Add waiters to post-mortem GPU hang state.
v18: Return early for a completed wait after acquiring the spinlock.
Avoids adding ourselves to the tree if the is already complete, and
skips the awkward question of why we don't do completion wakeups for
waits earlier than or equal to ourselves.
v19: Prepare for init_breadcrumbs to fail. Later patches may want to
allocate during init, so be prepared to propagate back the error code.
Testcase: igt/gem_concurrent_blit
Testcase: igt/benchmarks/gem_latency
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: "Rogozhkin, Dmitry V" <dmitry.v.rogozhkin@intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Dave Gordon <david.s.gordon@intel.com>
Cc: "Goel, Akash" <akash.goel@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> #v18
Link: http://patchwork.freedesktop.org/patch/msgid/1467390209-3576-6-git-send-email-chris@chris-wilson.co.uk
2016-07-01 23:23:15 +07:00
|
|
|
/* Ensure our read of the seqno is coherent so that we
|
|
|
|
* do not "miss an interrupt" (i.e. if this is the last
|
|
|
|
* request and the seqno write from the GPU is not visible
|
|
|
|
* by the time the interrupt fires, we will see that the
|
|
|
|
* request is incomplete and go back to sleep awaiting
|
|
|
|
* another interrupt that will never come.)
|
|
|
|
*
|
|
|
|
* Strictly, we only need to do this once after an interrupt,
|
|
|
|
* but it is easier and safer to do it every time the waiter
|
|
|
|
* is woken.
|
|
|
|
*/
|
2016-07-01 23:23:23 +07:00
|
|
|
if (engine->irq_seqno_barrier &&
|
2017-01-24 22:18:05 +07:00
|
|
|
test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) {
|
2017-02-28 03:58:48 +07:00
|
|
|
struct intel_breadcrumbs *b = &engine->breadcrumbs;
|
2016-07-06 18:39:01 +07:00
|
|
|
|
2016-07-01 23:23:23 +07:00
|
|
|
/* The ordering of irq_posted versus applying the barrier
|
|
|
|
* is crucial. The clearing of the current irq_posted must
|
|
|
|
* be visible before we perform the barrier operation,
|
|
|
|
* such that if a subsequent interrupt arrives, irq_posted
|
|
|
|
* is reasserted and our task rewoken (which causes us to
|
|
|
|
* do another __i915_request_irq_complete() immediately
|
|
|
|
* and reapply the barrier). Conversely, if the clear
|
|
|
|
* occurs after the barrier, then an interrupt that arrived
|
|
|
|
* whilst we waited on the barrier would not trigger a
|
|
|
|
* barrier on the next pass, and the read may not see the
|
|
|
|
* seqno update.
|
|
|
|
*/
|
2016-07-01 23:23:16 +07:00
|
|
|
engine->irq_seqno_barrier(engine);
|
2016-07-06 18:39:01 +07:00
|
|
|
|
|
|
|
/* If we consume the irq, but we are no longer the bottom-half,
|
|
|
|
* the real bottom-half may not have serialised their own
|
|
|
|
* seqno check with the irq-barrier (i.e. may have inspected
|
|
|
|
* the seqno before we believe it coherent since they see
|
|
|
|
* irq_posted == false but we are still running).
|
|
|
|
*/
|
2017-03-06 22:03:19 +07:00
|
|
|
spin_lock_irq(&b->irq_lock);
|
2017-03-04 02:08:24 +07:00
|
|
|
if (b->irq_wait && b->irq_wait->tsk != current)
|
2016-07-06 18:39:01 +07:00
|
|
|
/* Note that if the bottom-half is changed as we
|
|
|
|
* are sending the wake-up, the new bottom-half will
|
|
|
|
* be woken by whomever made the change. We only have
|
|
|
|
* to worry about when we steal the irq-posted for
|
|
|
|
* ourself.
|
|
|
|
*/
|
2017-03-04 02:08:24 +07:00
|
|
|
wake_up_process(b->irq_wait->tsk);
|
2017-03-06 22:03:19 +07:00
|
|
|
spin_unlock_irq(&b->irq_lock);
|
2016-07-06 18:39:01 +07:00
|
|
|
|
2018-02-21 16:56:36 +07:00
|
|
|
if (__i915_request_completed(rq, seqno))
|
2016-07-01 23:23:22 +07:00
|
|
|
return true;
|
|
|
|
}
|
drm/i915: Slaughter the thundering i915_wait_request herd
One particularly stressful scenario consists of many independent tasks
all competing for GPU time and waiting upon the results (e.g. realtime
transcoding of many, many streams). One bottleneck in particular is that
each client waits on its own results, but every client is woken up after
every batchbuffer - hence the thunder of hooves as then every client must
do its heavyweight dance to read a coherent seqno to see if it is the
lucky one.
Ideally, we only want one client to wake up after the interrupt and
check its request for completion. Since the requests must retire in
order, we can select the first client on the oldest request to be woken.
Once that client has completed his wait, we can then wake up the
next client and so on. However, all clients then incur latency as every
process in the chain may be delayed for scheduling - this may also then
cause some priority inversion. To reduce the latency, when a client
is added or removed from the list, we scan the tree for completed
seqno and wake up all the completed waiters in parallel.
Using igt/benchmarks/gem_latency, we can demonstrate this effect. The
benchmark measures the number of GPU cycles between completion of a
batch and the client waking up from a call to wait-ioctl. With many
concurrent waiters, with each on a different request, we observe that
the wakeup latency before the patch scales nearly linearly with the
number of waiters (before external factors kick in making the scaling much
worse). After applying the patch, we can see that only the single waiter
for the request is being woken up, providing a constant wakeup latency
for every operation. However, the situation is not quite as rosy for
many waiters on the same request, though to the best of my knowledge this
is much less likely in practice. Here, we can observe that the
concurrent waiters incur extra latency from being woken up by the
solitary bottom-half, rather than directly by the interrupt. This
appears to be scheduler induced (having discounted adverse effects from
having a rbtree walk/erase in the wakeup path), each additional
wake_up_process() costs approximately 1us on big core. Another effect of
performing the secondary wakeups from the first bottom-half is the
incurred delay this imposes on high priority threads - rather than
immediately returning to userspace and leaving the interrupt handler to
wake the others.
To offset the delay incurred with additional waiters on a request, we
could use a hybrid scheme that did a quick read in the interrupt handler
and dequeued all the completed waiters (incurring the overhead in the
interrupt handler, not the best plan either as we then incur GPU
submission latency) but we would still have to wake up the bottom-half
every time to do the heavyweight slow read. Or we could only kick the
waiters on the seqno with the same priority as the current task (i.e. in
the realtime waiter scenario, only it is woken up immediately by the
interrupt and simply queues the next waiter before returning to userspace,
minimising its delay at the expense of the chain, and also reducing
contention on its scheduler runqueue). This is effective at avoid long
pauses in the interrupt handler and at avoiding the extra latency in
realtime/high-priority waiters.
v2: Convert from a kworker per engine into a dedicated kthread for the
bottom-half.
v3: Rename request members and tweak comments.
v4: Use a per-engine spinlock in the breadcrumbs bottom-half.
v5: Fix race in locklessly checking waiter status and kicking the task on
adding a new waiter.
v6: Fix deciding when to force the timer to hide missing interrupts.
v7: Move the bottom-half from the kthread to the first client process.
v8: Reword a few comments
v9: Break the busy loop when the interrupt is unmasked or has fired.
v10: Comments, unnecessary churn, better debugging from Tvrtko
v11: Wake all completed waiters on removing the current bottom-half to
reduce the latency of waking up a herd of clients all waiting on the
same request.
v12: Rearrange missed-interrupt fault injection so that it works with
igt/drv_missed_irq_hang
v13: Rename intel_breadcrumb and friends to intel_wait in preparation
for signal handling.
v14: RCU commentary, assert_spin_locked
v15: Hide BUG_ON behind the compiler; report on gem_latency findings.
v16: Sort seqno-groups by priority so that first-waiter has the highest
task priority (and so avoid priority inversion).
v17: Add waiters to post-mortem GPU hang state.
v18: Return early for a completed wait after acquiring the spinlock.
Avoids adding ourselves to the tree if the is already complete, and
skips the awkward question of why we don't do completion wakeups for
waits earlier than or equal to ourselves.
v19: Prepare for init_breadcrumbs to fail. Later patches may want to
allocate during init, so be prepared to propagate back the error code.
Testcase: igt/gem_concurrent_blit
Testcase: igt/benchmarks/gem_latency
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: "Rogozhkin, Dmitry V" <dmitry.v.rogozhkin@intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Dave Gordon <david.s.gordon@intel.com>
Cc: "Goel, Akash" <akash.goel@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> #v18
Link: http://patchwork.freedesktop.org/patch/msgid/1467390209-3576-6-git-send-email-chris@chris-wilson.co.uk
2016-07-01 23:23:15 +07:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-08-12 18:39:59 +07:00
|
|
|
void i915_memcpy_init_early(struct drm_i915_private *dev_priv);
|
|
|
|
bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
|
|
|
|
|
2017-01-06 22:20:09 +07:00
|
|
|
/* The movntdqa instructions used for memcpy-from-wc require 16-byte alignment,
|
|
|
|
* as well as SSE4.1 support. i915_memcpy_from_wc() will report if it cannot
|
|
|
|
* perform the operation. To check beforehand, pass in the parameters to
|
|
|
|
* to i915_can_memcpy_from_wc() - since we only care about the low 4 bits,
|
|
|
|
* you only need to pass in the minor offsets, page-aligned pointers are
|
|
|
|
* always valid.
|
|
|
|
*
|
|
|
|
* For just checking for SSE4.1, in the foreknowledge that the future use
|
|
|
|
* will be correctly aligned, just use i915_has_memcpy_from_wc().
|
|
|
|
*/
|
|
|
|
#define i915_can_memcpy_from_wc(dst, src, len) \
|
|
|
|
i915_memcpy_from_wc((void *)((unsigned long)(dst) | (unsigned long)(src) | (len)), NULL, 0)
|
|
|
|
|
|
|
|
#define i915_has_memcpy_from_wc() \
|
|
|
|
i915_memcpy_from_wc(NULL, NULL, 0)
|
|
|
|
|
2016-08-19 22:54:28 +07:00
|
|
|
/* i915_mm.c */
|
|
|
|
int remap_io_mapping(struct vm_area_struct *vma,
|
|
|
|
unsigned long addr, unsigned long pfn, unsigned long size,
|
|
|
|
struct io_mapping *iomap);
|
|
|
|
|
2017-09-13 15:56:05 +07:00
|
|
|
static inline int intel_hws_csb_write_index(struct drm_i915_private *i915)
|
|
|
|
{
|
|
|
|
if (INTEL_GEN(i915) >= 10)
|
|
|
|
return CNL_HWS_CSB_WRITE_INDEX;
|
|
|
|
else
|
|
|
|
return I915_HWS_CSB_WRITE_INDEX;
|
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#endif
|