linux_dsm_epyc7002/drivers/gpu/drm/i915/i915_debugfs.c

5616 lines
150 KiB
C
Raw Normal View History

/*
* Copyright © 2008 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
* Keith Packard <keithp@keithp.com>
*
*/
#include <linux/seq_file.h>
#include <linux/circ_buf.h>
#include <linux/ctype.h>
#include <linux/debugfs.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 15:04:11 +07:00
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/list_sort.h>
#include <asm/msr-index.h>
#include <drm/drmP.h>
#include "intel_drv.h"
#include "intel_ringbuffer.h"
#include <drm/i915_drm.h>
#include "i915_drv.h"
enum {
ACTIVE_LIST,
INACTIVE_LIST,
PINNED_LIST,
};
/* As the drm_debugfs_init() routines are called before dev->dev_private is
* allocated we need to hook into the minor for release. */
static int
drm_add_fake_info_node(struct drm_minor *minor,
struct dentry *ent,
const void *key)
{
struct drm_info_node *node;
node = kmalloc(sizeof(*node), GFP_KERNEL);
if (node == NULL) {
debugfs_remove(ent);
return -ENOMEM;
}
node->minor = minor;
node->dent = ent;
node->info_ent = (void *) key;
mutex_lock(&minor->debugfs_lock);
list_add(&node->list, &minor->debugfs_list);
mutex_unlock(&minor->debugfs_lock);
return 0;
}
static int i915_capabilities(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
const struct intel_device_info *info = INTEL_INFO(dev);
seq_printf(m, "gen: %d\n", info->gen);
seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev));
#define PRINT_FLAG(x) seq_printf(m, #x ": %s\n", yesno(info->x))
#define SEP_SEMICOLON ;
DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG, SEP_SEMICOLON);
#undef PRINT_FLAG
#undef SEP_SEMICOLON
return 0;
}
static char get_active_flag(struct drm_i915_gem_object *obj)
{
return obj->active ? '*' : ' ';
}
static char get_pin_flag(struct drm_i915_gem_object *obj)
{
return obj->pin_display ? 'p' : ' ';
}
static char get_tiling_flag(struct drm_i915_gem_object *obj)
{
switch (obj->tiling_mode) {
default:
case I915_TILING_NONE: return ' ';
case I915_TILING_X: return 'X';
case I915_TILING_Y: return 'Y';
}
}
static char get_global_flag(struct drm_i915_gem_object *obj)
{
return i915_gem_obj_to_ggtt(obj) ? 'g' : ' ';
}
static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
{
return obj->mapping ? 'M' : ' ';
}
static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj)
{
u64 size = 0;
struct i915_vma *vma;
list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (vma->is_ggtt && drm_mm_node_allocated(&vma->node))
size += vma->node.size;
}
return size;
}
static void
describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
{
drm/i915: Implement inter-engine read-read optimisations Currently, we only track the last request globally across all engines. This prevents us from issuing concurrent read requests on e.g. the RCS and BCS engines (or more likely the render and media engines). Without semaphores, we incur costly stalls as we synchronise between rings - greatly impacting the current performance of Broadwell versus Haswell in certain workloads (like video decode). With the introduction of reference counted requests, it is much easier to track the last request per ring, as well as the last global write request so that we can optimise inter-engine read read requests (as well as better optimise certain CPU waits). v2: Fix inverted readonly condition for nonblocking waits. v3: Handle non-continguous engine array after waits v4: Rebase, tidy, rewrite ring list debugging v5: Use obj->active as a bitfield, it looks cool v6: Micro-optimise, mostly involving moving code around v7: Fix retire-requests-upto for execlists (and multiple rq->ringbuf) v8: Rebase v9: Refactor i915_gem_object_sync() to allow the compiler to better optimise it. Benchmark: igt/gem_read_read_speed hsw:gt3e (with semaphores): Before: Time to read-read 1024k: 275.794µs After: Time to read-read 1024k: 123.260µs hsw:gt3e (w/o semaphores): Before: Time to read-read 1024k: 230.433µs After: Time to read-read 1024k: 124.593µs bdw-u (w/o semaphores): Before After Time to read-read 1x1: 26.274µs 10.350µs Time to read-read 128x128: 40.097µs 21.366µs Time to read-read 256x256: 77.087µs 42.608µs Time to read-read 512x512: 281.999µs 181.155µs Time to read-read 1024x1024: 1196.141µs 1118.223µs Time to read-read 2048x2048: 5639.072µs 5225.837µs Time to read-read 4096x4096: 22401.662µs 21137.067µs Time to read-read 8192x8192: 89617.735µs 85637.681µs Testcase: igt/gem_concurrent_blit (read-read and friends) Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> [v8] [danvet: s/\<rq\>/req/g] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-27 19:41:17 +07:00
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct intel_engine_cs *engine;
struct i915_vma *vma;
int pin_count = 0;
enum intel_engine_id id;
drm/i915: Add struct_mutex locking for debugs/i915_gem_framebuffer Since describe_obj() looks at state guarded by the struct_mutex, we need to be holding it. [ 580.201054] drv_suspend: starting subtest debugfs-reader [ 580.239652] ------------[ cut here ]------------ [ 580.239696] WARNING: CPU: 0 PID: 920 at include/linux/list_check.h:25 describe_obj+0x419/0x440() [ 580.239725] CPU: 0 PID: 920 Comm: cat Not tainted 4.5.0-rc6+ #835 [ 580.239745] Hardware name: /NUC5CPYB, BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 580.239767] 0000000000000000 ffff88027554fcf8 ffffffff812c1135 0000000000000000 [ 580.239815] ffffffff8193dc42 ffff88027554fd30 ffffffff8107419d ffff880071727c00 [ 580.239858] ffff8802757d8000 ffffffff818f693c ffffffff818f693c ffff8802757b9048 [ 580.239896] Call Trace: [ 580.239917] [<ffffffff812c1135>] dump_stack+0x67/0x92 [ 580.239939] [<ffffffff8107419d>] warn_slowpath_common+0x7d/0xb0 [ 580.239959] [<ffffffff810742ba>] warn_slowpath_null+0x1a/0x20 [ 580.239981] [<ffffffff813ce579>] describe_obj+0x419/0x440 [ 580.240006] [<ffffffff813ced22>] i915_gem_framebuffer_info+0xa2/0x100 [ 580.240033] [<ffffffff811a9286>] seq_read+0xe6/0x3b0 [ 580.240059] [<ffffffff81182288>] __vfs_read+0x28/0xd0 [ 580.240085] [<ffffffff81173378>] ? SyS_fadvise64+0x228/0x2c0 [ 580.240112] [<ffffffff811823b2>] vfs_read+0x82/0x110 [ 580.240137] [<ffffffff811827d9>] SyS_read+0x49/0xa0 [ 580.240162] [<ffffffff815bac57>] entry_SYSCALL_64_fastpath+0x12/0x6b [ 580.240187] ---[ end trace 3e2cbf34576c9878 ]--- [ 580.281900] ------------[ cut here ]------------ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1459689261-7920-1-git-send-email-chris@chris-wilson.co.uk
2016-04-03 20:14:20 +07:00
lockdep_assert_held(&obj->base.dev->struct_mutex);
seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x [ ",
&obj->base,
get_active_flag(obj),
get_pin_flag(obj),
get_tiling_flag(obj),
get_global_flag(obj),
get_pin_mapped_flag(obj),
obj->base.size / 1024,
obj->base.read_domains,
drm/i915: Implement inter-engine read-read optimisations Currently, we only track the last request globally across all engines. This prevents us from issuing concurrent read requests on e.g. the RCS and BCS engines (or more likely the render and media engines). Without semaphores, we incur costly stalls as we synchronise between rings - greatly impacting the current performance of Broadwell versus Haswell in certain workloads (like video decode). With the introduction of reference counted requests, it is much easier to track the last request per ring, as well as the last global write request so that we can optimise inter-engine read read requests (as well as better optimise certain CPU waits). v2: Fix inverted readonly condition for nonblocking waits. v3: Handle non-continguous engine array after waits v4: Rebase, tidy, rewrite ring list debugging v5: Use obj->active as a bitfield, it looks cool v6: Micro-optimise, mostly involving moving code around v7: Fix retire-requests-upto for execlists (and multiple rq->ringbuf) v8: Rebase v9: Refactor i915_gem_object_sync() to allow the compiler to better optimise it. Benchmark: igt/gem_read_read_speed hsw:gt3e (with semaphores): Before: Time to read-read 1024k: 275.794µs After: Time to read-read 1024k: 123.260µs hsw:gt3e (w/o semaphores): Before: Time to read-read 1024k: 230.433µs After: Time to read-read 1024k: 124.593µs bdw-u (w/o semaphores): Before After Time to read-read 1x1: 26.274µs 10.350µs Time to read-read 128x128: 40.097µs 21.366µs Time to read-read 256x256: 77.087µs 42.608µs Time to read-read 512x512: 281.999µs 181.155µs Time to read-read 1024x1024: 1196.141µs 1118.223µs Time to read-read 2048x2048: 5639.072µs 5225.837µs Time to read-read 4096x4096: 22401.662µs 21137.067µs Time to read-read 8192x8192: 89617.735µs 85637.681µs Testcase: igt/gem_concurrent_blit (read-read and friends) Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> [v8] [danvet: s/\<rq\>/req/g] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-27 19:41:17 +07:00
obj->base.write_domain);
for_each_engine_id(engine, dev_priv, id)
drm/i915: Implement inter-engine read-read optimisations Currently, we only track the last request globally across all engines. This prevents us from issuing concurrent read requests on e.g. the RCS and BCS engines (or more likely the render and media engines). Without semaphores, we incur costly stalls as we synchronise between rings - greatly impacting the current performance of Broadwell versus Haswell in certain workloads (like video decode). With the introduction of reference counted requests, it is much easier to track the last request per ring, as well as the last global write request so that we can optimise inter-engine read read requests (as well as better optimise certain CPU waits). v2: Fix inverted readonly condition for nonblocking waits. v3: Handle non-continguous engine array after waits v4: Rebase, tidy, rewrite ring list debugging v5: Use obj->active as a bitfield, it looks cool v6: Micro-optimise, mostly involving moving code around v7: Fix retire-requests-upto for execlists (and multiple rq->ringbuf) v8: Rebase v9: Refactor i915_gem_object_sync() to allow the compiler to better optimise it. Benchmark: igt/gem_read_read_speed hsw:gt3e (with semaphores): Before: Time to read-read 1024k: 275.794µs After: Time to read-read 1024k: 123.260µs hsw:gt3e (w/o semaphores): Before: Time to read-read 1024k: 230.433µs After: Time to read-read 1024k: 124.593µs bdw-u (w/o semaphores): Before After Time to read-read 1x1: 26.274µs 10.350µs Time to read-read 128x128: 40.097µs 21.366µs Time to read-read 256x256: 77.087µs 42.608µs Time to read-read 512x512: 281.999µs 181.155µs Time to read-read 1024x1024: 1196.141µs 1118.223µs Time to read-read 2048x2048: 5639.072µs 5225.837µs Time to read-read 4096x4096: 22401.662µs 21137.067µs Time to read-read 8192x8192: 89617.735µs 85637.681µs Testcase: igt/gem_concurrent_blit (read-read and friends) Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> [v8] [danvet: s/\<rq\>/req/g] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-27 19:41:17 +07:00
seq_printf(m, "%x ",
i915_gem_request_get_seqno(obj->last_read[id].request));
drm/i915: Implement inter-engine read-read optimisations Currently, we only track the last request globally across all engines. This prevents us from issuing concurrent read requests on e.g. the RCS and BCS engines (or more likely the render and media engines). Without semaphores, we incur costly stalls as we synchronise between rings - greatly impacting the current performance of Broadwell versus Haswell in certain workloads (like video decode). With the introduction of reference counted requests, it is much easier to track the last request per ring, as well as the last global write request so that we can optimise inter-engine read read requests (as well as better optimise certain CPU waits). v2: Fix inverted readonly condition for nonblocking waits. v3: Handle non-continguous engine array after waits v4: Rebase, tidy, rewrite ring list debugging v5: Use obj->active as a bitfield, it looks cool v6: Micro-optimise, mostly involving moving code around v7: Fix retire-requests-upto for execlists (and multiple rq->ringbuf) v8: Rebase v9: Refactor i915_gem_object_sync() to allow the compiler to better optimise it. Benchmark: igt/gem_read_read_speed hsw:gt3e (with semaphores): Before: Time to read-read 1024k: 275.794µs After: Time to read-read 1024k: 123.260µs hsw:gt3e (w/o semaphores): Before: Time to read-read 1024k: 230.433µs After: Time to read-read 1024k: 124.593µs bdw-u (w/o semaphores): Before After Time to read-read 1x1: 26.274µs 10.350µs Time to read-read 128x128: 40.097µs 21.366µs Time to read-read 256x256: 77.087µs 42.608µs Time to read-read 512x512: 281.999µs 181.155µs Time to read-read 1024x1024: 1196.141µs 1118.223µs Time to read-read 2048x2048: 5639.072µs 5225.837µs Time to read-read 4096x4096: 22401.662µs 21137.067µs Time to read-read 8192x8192: 89617.735µs 85637.681µs Testcase: igt/gem_concurrent_blit (read-read and friends) Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> [v8] [danvet: s/\<rq\>/req/g] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-27 19:41:17 +07:00
seq_printf(m, "] %x %x%s%s%s",
i915_gem_request_get_seqno(obj->last_write.request),
i915_gem_request_get_seqno(obj->last_fence.request),
i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level),
obj->dirty ? " dirty" : "",
obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
if (obj->base.name)
seq_printf(m, " (name: %d)", obj->base.name);
list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (vma->pin_count > 0)
pin_count++;
}
seq_printf(m, " (pinned x %d)", pin_count);
if (obj->pin_display)
seq_printf(m, " (display)");
if (obj->fence_reg != I915_FENCE_REG_NONE)
seq_printf(m, " (fence: %d)", obj->fence_reg);
list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
seq_printf(m, " (%sgtt offset: %08llx, size: %08llx",
vma->is_ggtt ? "g" : "pp",
vma->node.start, vma->node.size);
if (vma->is_ggtt)
seq_printf(m, ", type: %u", vma->ggtt_view.type);
seq_puts(m, ")");
}
if (obj->stolen)
seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
if (obj->pin_display || obj->fault_mappable) {
char s[3], *t = s;
if (obj->pin_display)
*t++ = 'p';
if (obj->fault_mappable)
*t++ = 'f';
*t = '\0';
seq_printf(m, " (%s mappable)", s);
}
if (obj->last_write.request)
seq_printf(m, " (%s)", obj->last_write.request->engine->name);
if (obj->frontbuffer_bits)
seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
}
static int i915_gem_object_list_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
uintptr_t list = (uintptr_t) node->info_ent->data;
struct list_head *head;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt;
2013-08-01 07:00:14 +07:00
struct i915_vma *vma;
u64 total_obj_size, total_gtt_size;
int count, ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
2013-08-01 07:00:14 +07:00
/* FIXME: the user of this interface might want more than just GGTT */
switch (list) {
case ACTIVE_LIST:
seq_puts(m, "Active:\n");
head = &ggtt->base.active_list;
break;
case INACTIVE_LIST:
seq_puts(m, "Inactive:\n");
head = &ggtt->base.inactive_list;
break;
default:
mutex_unlock(&dev->struct_mutex);
return -EINVAL;
}
total_obj_size = total_gtt_size = count = 0;
list_for_each_entry(vma, head, vm_link) {
2013-08-01 07:00:14 +07:00
seq_printf(m, " ");
describe_obj(m, vma->obj);
seq_printf(m, "\n");
total_obj_size += vma->obj->base.size;
total_gtt_size += vma->node.size;
count++;
}
mutex_unlock(&dev->struct_mutex);
seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
count, total_obj_size, total_gtt_size);
return 0;
}
static int obj_rank_by_stolen(void *priv,
struct list_head *A, struct list_head *B)
{
struct drm_i915_gem_object *a =
container_of(A, struct drm_i915_gem_object, obj_exec_link);
struct drm_i915_gem_object *b =
container_of(B, struct drm_i915_gem_object, obj_exec_link);
if (a->stolen->start < b->stolen->start)
return -1;
if (a->stolen->start > b->stolen->start)
return 1;
return 0;
}
static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_gem_object *obj;
u64 total_obj_size, total_gtt_size;
LIST_HEAD(stolen);
int count, ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
total_obj_size = total_gtt_size = count = 0;
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
if (obj->stolen == NULL)
continue;
list_add(&obj->obj_exec_link, &stolen);
total_obj_size += obj->base.size;
total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
count++;
}
list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
if (obj->stolen == NULL)
continue;
list_add(&obj->obj_exec_link, &stolen);
total_obj_size += obj->base.size;
count++;
}
list_sort(NULL, &stolen, obj_rank_by_stolen);
seq_puts(m, "Stolen:\n");
while (!list_empty(&stolen)) {
obj = list_first_entry(&stolen, typeof(*obj), obj_exec_link);
seq_puts(m, " ");
describe_obj(m, obj);
seq_putc(m, '\n');
list_del_init(&obj->obj_exec_link);
}
mutex_unlock(&dev->struct_mutex);
seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
count, total_obj_size, total_gtt_size);
return 0;
}
#define count_objects(list, member) do { \
list_for_each_entry(obj, list, member) { \
size += i915_gem_obj_total_ggtt_size(obj); \
++count; \
if (obj->map_and_fenceable) { \
mappable_size += i915_gem_obj_ggtt_size(obj); \
++mappable_count; \
} \
} \
} while (0)
struct file_stats {
struct drm_i915_file_private *file_priv;
unsigned long count;
u64 total, unbound;
u64 global, shared;
u64 active, inactive;
};
static int per_file_stats(int id, void *ptr, void *data)
{
struct drm_i915_gem_object *obj = ptr;
struct file_stats *stats = data;
struct i915_vma *vma;
stats->count++;
stats->total += obj->base.size;
if (!obj->bind_count)
stats->unbound += obj->base.size;
if (obj->base.name || obj->base.dma_buf)
stats->shared += obj->base.size;
list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
if (vma->is_ggtt) {
stats->global += vma->node.size;
} else {
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vma->vm);
if (ppgtt->base.file != stats->file_priv)
continue;
}
if (obj->active) /* XXX per-vma statistic */
stats->active += vma->node.size;
else
stats->inactive += vma->node.size;
}
return 0;
}
#define print_file_stats(m, name, stats) do { \
if (stats.count) \
seq_printf(m, "%s: %lu objects, %llu bytes (%llu active, %llu inactive, %llu global, %llu shared, %llu unbound)\n", \
name, \
stats.count, \
stats.total, \
stats.active, \
stats.inactive, \
stats.global, \
stats.shared, \
stats.unbound); \
} while (0)
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 03:13:08 +07:00
static void print_batch_pool_stats(struct seq_file *m,
struct drm_i915_private *dev_priv)
{
struct drm_i915_gem_object *obj;
struct file_stats stats;
struct intel_engine_cs *engine;
int j;
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 03:13:08 +07:00
memset(&stats, 0, sizeof(stats));
for_each_engine(engine, dev_priv) {
for (j = 0; j < ARRAY_SIZE(engine->batch_pool.cache_list); j++) {
list_for_each_entry(obj,
&engine->batch_pool.cache_list[j],
batch_pool_link)
per_file_stats(0, obj, &stats);
}
}
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 03:13:08 +07:00
print_file_stats(m, "[k]batch pool", stats);
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 03:13:08 +07:00
}
static int per_file_ctx_stats(int id, void *ptr, void *data)
{
struct i915_gem_context *ctx = ptr;
int n;
for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) {
if (ctx->engine[n].state)
per_file_stats(0, ctx->engine[n].state, data);
if (ctx->engine[n].ring)
per_file_stats(0, ctx->engine[n].ring->obj, data);
}
return 0;
}
static void print_context_stats(struct seq_file *m,
struct drm_i915_private *dev_priv)
{
struct file_stats stats;
struct drm_file *file;
memset(&stats, 0, sizeof(stats));
mutex_lock(&dev_priv->drm.struct_mutex);
if (dev_priv->kernel_context)
per_file_ctx_stats(0, dev_priv->kernel_context, &stats);
list_for_each_entry(file, &dev_priv->drm.filelist, lhead) {
struct drm_i915_file_private *fpriv = file->driver_priv;
idr_for_each(&fpriv->context_idr, per_file_ctx_stats, &stats);
}
mutex_unlock(&dev_priv->drm.struct_mutex);
print_file_stats(m, "[k]contexts", stats);
}
2013-08-01 07:00:14 +07:00
#define count_vmas(list, member) do { \
list_for_each_entry(vma, list, member) { \
size += i915_gem_obj_total_ggtt_size(vma->obj); \
2013-08-01 07:00:14 +07:00
++count; \
if (vma->obj->map_and_fenceable) { \
mappable_size += i915_gem_obj_ggtt_size(vma->obj); \
++mappable_count; \
} \
} \
} while (0)
static int i915_gem_object_info(struct seq_file *m, void* data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt;
u32 count, mappable_count, purgeable_count;
u64 size, mappable_size, purgeable_size;
unsigned long pin_mapped_count = 0, pin_mapped_purgeable_count = 0;
u64 pin_mapped_size = 0, pin_mapped_purgeable_size = 0;
struct drm_i915_gem_object *obj;
struct drm_file *file;
2013-08-01 07:00:14 +07:00
struct i915_vma *vma;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
seq_printf(m, "%u objects, %zu bytes\n",
dev_priv->mm.object_count,
dev_priv->mm.object_memory);
size = count = mappable_size = mappable_count = 0;
count_objects(&dev_priv->mm.bound_list, global_list);
seq_printf(m, "%u [%u] objects, %llu [%llu] bytes in gtt\n",
count, mappable_count, size, mappable_size);
size = count = mappable_size = mappable_count = 0;
count_vmas(&ggtt->base.active_list, vm_link);
seq_printf(m, " %u [%u] active objects, %llu [%llu] bytes\n",
count, mappable_count, size, mappable_size);
size = count = mappable_size = mappable_count = 0;
count_vmas(&ggtt->base.inactive_list, vm_link);
seq_printf(m, " %u [%u] inactive objects, %llu [%llu] bytes\n",
count, mappable_count, size, mappable_size);
size = count = purgeable_size = purgeable_count = 0;
list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
drm/i915: Track unbound pages When dealing with a working set larger than the GATT, or even the mappable aperture when touching through the GTT, we end up with evicting objects only to rebind them at a new offset again later. Moving an object into and out of the GTT requires clflushing the pages, thus causing a double-clflush penalty for rebinding. To avoid having to clflush on rebinding, we can track the pages as they are evicted from the GTT and only relinquish those pages on memory pressure. As usual, if it were not for the handling of out-of-memory condition and having to manually shrink our own bo caches, it would be a net reduction of code. Alas. Note: The patch also contains a few changes to the last-hope evict_everything logic in i916_gem_execbuffer.c - we no longer try to only evict the purgeable stuff in a first try (since that's superflous and only helps in OOM corner-cases, not fragmented-gtt trashing situations). Also, the extraction of the get_pages retry loop from bind_to_gtt (and other callsites) to get_pages should imo have been a separate patch. v2: Ditch the newly added put_pages (for unbound objects only) in i915_gem_reset. A quick irc discussion hasn't revealed any important reason for this, so if we need this, I'd like to have a git blame'able explanation for it. v3: Undo the s/drm_malloc_ab/kmalloc/ in get_pages that Chris noticed. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: Split out code movements and rant a bit in the commit message with a few Notes. Done v2] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-08-20 16:40:46 +07:00
size += obj->base.size, ++count;
if (obj->madv == I915_MADV_DONTNEED)
purgeable_size += obj->base.size, ++purgeable_count;
if (obj->mapping) {
pin_mapped_count++;
pin_mapped_size += obj->base.size;
if (obj->pages_pin_count == 0) {
pin_mapped_purgeable_count++;
pin_mapped_purgeable_size += obj->base.size;
}
}
}
seq_printf(m, "%u unbound objects, %llu bytes\n", count, size);
drm/i915: Track unbound pages When dealing with a working set larger than the GATT, or even the mappable aperture when touching through the GTT, we end up with evicting objects only to rebind them at a new offset again later. Moving an object into and out of the GTT requires clflushing the pages, thus causing a double-clflush penalty for rebinding. To avoid having to clflush on rebinding, we can track the pages as they are evicted from the GTT and only relinquish those pages on memory pressure. As usual, if it were not for the handling of out-of-memory condition and having to manually shrink our own bo caches, it would be a net reduction of code. Alas. Note: The patch also contains a few changes to the last-hope evict_everything logic in i916_gem_execbuffer.c - we no longer try to only evict the purgeable stuff in a first try (since that's superflous and only helps in OOM corner-cases, not fragmented-gtt trashing situations). Also, the extraction of the get_pages retry loop from bind_to_gtt (and other callsites) to get_pages should imo have been a separate patch. v2: Ditch the newly added put_pages (for unbound objects only) in i915_gem_reset. A quick irc discussion hasn't revealed any important reason for this, so if we need this, I'd like to have a git blame'able explanation for it. v3: Undo the s/drm_malloc_ab/kmalloc/ in get_pages that Chris noticed. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: Split out code movements and rant a bit in the commit message with a few Notes. Done v2] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-08-20 16:40:46 +07:00
size = count = mappable_size = mappable_count = 0;
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
if (obj->fault_mappable) {
size += i915_gem_obj_ggtt_size(obj);
++count;
}
if (obj->pin_display) {
mappable_size += i915_gem_obj_ggtt_size(obj);
++mappable_count;
}
if (obj->madv == I915_MADV_DONTNEED) {
purgeable_size += obj->base.size;
++purgeable_count;
}
if (obj->mapping) {
pin_mapped_count++;
pin_mapped_size += obj->base.size;
if (obj->pages_pin_count == 0) {
pin_mapped_purgeable_count++;
pin_mapped_purgeable_size += obj->base.size;
}
}
}
seq_printf(m, "%u purgeable objects, %llu bytes\n",
purgeable_count, purgeable_size);
seq_printf(m, "%u pinned mappable objects, %llu bytes\n",
mappable_count, mappable_size);
seq_printf(m, "%u fault mappable objects, %llu bytes\n",
count, size);
seq_printf(m,
"%lu [%lu] pin mapped objects, %llu [%llu] bytes [purgeable]\n",
pin_mapped_count, pin_mapped_purgeable_count,
pin_mapped_size, pin_mapped_purgeable_size);
seq_printf(m, "%llu [%llu] gtt total\n",
ggtt->base.total, ggtt->mappable_end - ggtt->base.start);
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 03:13:08 +07:00
seq_putc(m, '\n');
print_batch_pool_stats(m, dev_priv);
mutex_unlock(&dev->struct_mutex);
mutex_lock(&dev->filelist_mutex);
print_context_stats(m, dev_priv);
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
struct file_stats stats;
struct task_struct *task;
memset(&stats, 0, sizeof(stats));
stats.file_priv = file->driver_priv;
drm/i915: Hold the table lock whilst walking the file's idr and counting the objects in debugfs Fixes an issue whereby we may race with the table updates (before the core takes the struct_mutex) and so risk dereferencing a stale pointer in the iterator for /debugfs/.../i915_gem_objects. For example, [ 1524.757545] BUG: unable to handle kernel paging request at f53af748 [ 1524.757572] IP: [<c1406982>] per_file_stats+0x12/0x100 [ 1524.757599] *pdpt = 0000000001b13001 *pde = 00000000379fb067 *pte = 80000000353af060 [ 1524.757621] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC [ 1524.757637] Modules linked in: ctr ccm arc4 ath9k ath9k_common ath9k_hw ath snd_hda_codec_conexant mac80211 snd_hda_codec_generic snd_hda_intel snd_hda_controller snd_hda_codec bnep snd_hwdep rfcomm snd_pcm gpio_ich dell_wmi sparse_keymap snd_seq_midi hid_multitouch uvcvideo snd_seq_midi_event dell_laptop snd_rawmidi dcdbas snd_seq videobuf2_vmalloc videobuf2_memops videobuf2_core usbhid videodev snd_seq_device coretemp snd_timer hid joydev kvm_intel cfg80211 ath3k kvm btusb bluetooth serio_raw snd microcode soundcore lpc_ich wmi mac_hid parport_pc ppdev lp parport psmouse ahci libahci [ 1524.757825] CPU: 3 PID: 1911 Comm: intel-gpu-overl Tainted: G W OE 3.15.0-rc3+ #96 [ 1524.757840] Hardware name: Dell Inc. Inspiron 1090/Inspiron 1090, BIOS A06 08/23/2011 [ 1524.757855] task: f52f36c0 ti: f4cbc000 task.ti: f4cbc000 [ 1524.757869] EIP: 0060:[<c1406982>] EFLAGS: 00210202 CPU: 3 [ 1524.757884] EIP is at per_file_stats+0x12/0x100 [ 1524.757896] EAX: 0000002d EBX: 00000000 ECX: f4cbdefc EDX: f53af700 [ 1524.757909] ESI: c1406970 EDI: f53af700 EBP: f4cbde6c ESP: f4cbde5c [ 1524.757922] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 [ 1524.757934] CR0: 80050033 CR2: f53af748 CR3: 356af000 CR4: 000007f0 [ 1524.757945] Stack: [ 1524.757957] f4cbdefc 00000000 c1406970 f53af700 f4cbdea8 c12e5f15 f4cbdefc c1406970 [ 1524.757993] 0000ffff f4cbde90 0000002d f5dc5cd0 e4e80438 c1181d59 f4cbded8 f4d89900 [ 1524.758027] f5631b40 e5131074 c1903f37 f4cbdf28 c14068e6 f52648a0 c1927748 c1903f37 [ 1524.758062] Call Trace: [ 1524.758084] [<c1406970>] ? i915_gem_object_info+0x510/0x510 [ 1524.758106] [<c12e5f15>] idr_for_each+0xa5/0x100 [ 1524.758126] [<c1406970>] ? i915_gem_object_info+0x510/0x510 [ 1524.758148] [<c1181d59>] ? seq_vprintf+0x29/0x50 [ 1524.758168] [<c14068e6>] i915_gem_object_info+0x486/0x510 [ 1524.758189] [<c11823a6>] seq_read+0xd6/0x380 [ 1524.758208] [<c116d11d>] ? final_putname+0x1d/0x40 [ 1524.758227] [<c11822d0>] ? seq_hlist_next_percpu+0x90/0x90 [ 1524.758246] [<c1163e52>] vfs_read+0x82/0x150 [ 1524.758265] [<c11645d6>] SyS_read+0x46/0x90 [ 1524.758285] [<c16b8d8c>] sysenter_do_call+0x12/0x22 [ 1524.758298] Code: f5 8f 2a 00 83 c4 6c 31 c0 5b 5e 5f 5d c3 8d 74 26 00 8d bc 27 00 00 00 00 55 89 e5 57 56 53 83 ec 04 3e 8d 74 26 00 83 41 04 01 <8b> 42 48 01 41 08 8b 42 4c 89 d7 85 c0 75 07 8b 42 60 85 c0 74 [ 1524.758461] EIP: [<c1406982>] per_file_stats+0x12/0x100 SS:ESP 0068:f4cbde5c [ 1524.758485] CR2: 00000000f53af748 Reported-by: Sam Jansen <sam.jansen@starleaf.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Sam Jansen <sam.jansen@starleaf.com> Cc: stable@vger.kernel.org Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Jani Nikula <jani.nikula@intel.com>
2014-06-17 15:56:24 +07:00
spin_lock(&file->table_lock);
idr_for_each(&file->object_idr, per_file_stats, &stats);
drm/i915: Hold the table lock whilst walking the file's idr and counting the objects in debugfs Fixes an issue whereby we may race with the table updates (before the core takes the struct_mutex) and so risk dereferencing a stale pointer in the iterator for /debugfs/.../i915_gem_objects. For example, [ 1524.757545] BUG: unable to handle kernel paging request at f53af748 [ 1524.757572] IP: [<c1406982>] per_file_stats+0x12/0x100 [ 1524.757599] *pdpt = 0000000001b13001 *pde = 00000000379fb067 *pte = 80000000353af060 [ 1524.757621] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC [ 1524.757637] Modules linked in: ctr ccm arc4 ath9k ath9k_common ath9k_hw ath snd_hda_codec_conexant mac80211 snd_hda_codec_generic snd_hda_intel snd_hda_controller snd_hda_codec bnep snd_hwdep rfcomm snd_pcm gpio_ich dell_wmi sparse_keymap snd_seq_midi hid_multitouch uvcvideo snd_seq_midi_event dell_laptop snd_rawmidi dcdbas snd_seq videobuf2_vmalloc videobuf2_memops videobuf2_core usbhid videodev snd_seq_device coretemp snd_timer hid joydev kvm_intel cfg80211 ath3k kvm btusb bluetooth serio_raw snd microcode soundcore lpc_ich wmi mac_hid parport_pc ppdev lp parport psmouse ahci libahci [ 1524.757825] CPU: 3 PID: 1911 Comm: intel-gpu-overl Tainted: G W OE 3.15.0-rc3+ #96 [ 1524.757840] Hardware name: Dell Inc. Inspiron 1090/Inspiron 1090, BIOS A06 08/23/2011 [ 1524.757855] task: f52f36c0 ti: f4cbc000 task.ti: f4cbc000 [ 1524.757869] EIP: 0060:[<c1406982>] EFLAGS: 00210202 CPU: 3 [ 1524.757884] EIP is at per_file_stats+0x12/0x100 [ 1524.757896] EAX: 0000002d EBX: 00000000 ECX: f4cbdefc EDX: f53af700 [ 1524.757909] ESI: c1406970 EDI: f53af700 EBP: f4cbde6c ESP: f4cbde5c [ 1524.757922] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 [ 1524.757934] CR0: 80050033 CR2: f53af748 CR3: 356af000 CR4: 000007f0 [ 1524.757945] Stack: [ 1524.757957] f4cbdefc 00000000 c1406970 f53af700 f4cbdea8 c12e5f15 f4cbdefc c1406970 [ 1524.757993] 0000ffff f4cbde90 0000002d f5dc5cd0 e4e80438 c1181d59 f4cbded8 f4d89900 [ 1524.758027] f5631b40 e5131074 c1903f37 f4cbdf28 c14068e6 f52648a0 c1927748 c1903f37 [ 1524.758062] Call Trace: [ 1524.758084] [<c1406970>] ? i915_gem_object_info+0x510/0x510 [ 1524.758106] [<c12e5f15>] idr_for_each+0xa5/0x100 [ 1524.758126] [<c1406970>] ? i915_gem_object_info+0x510/0x510 [ 1524.758148] [<c1181d59>] ? seq_vprintf+0x29/0x50 [ 1524.758168] [<c14068e6>] i915_gem_object_info+0x486/0x510 [ 1524.758189] [<c11823a6>] seq_read+0xd6/0x380 [ 1524.758208] [<c116d11d>] ? final_putname+0x1d/0x40 [ 1524.758227] [<c11822d0>] ? seq_hlist_next_percpu+0x90/0x90 [ 1524.758246] [<c1163e52>] vfs_read+0x82/0x150 [ 1524.758265] [<c11645d6>] SyS_read+0x46/0x90 [ 1524.758285] [<c16b8d8c>] sysenter_do_call+0x12/0x22 [ 1524.758298] Code: f5 8f 2a 00 83 c4 6c 31 c0 5b 5e 5f 5d c3 8d 74 26 00 8d bc 27 00 00 00 00 55 89 e5 57 56 53 83 ec 04 3e 8d 74 26 00 83 41 04 01 <8b> 42 48 01 41 08 8b 42 4c 89 d7 85 c0 75 07 8b 42 60 85 c0 74 [ 1524.758461] EIP: [<c1406982>] per_file_stats+0x12/0x100 SS:ESP 0068:f4cbde5c [ 1524.758485] CR2: 00000000f53af748 Reported-by: Sam Jansen <sam.jansen@starleaf.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Sam Jansen <sam.jansen@starleaf.com> Cc: stable@vger.kernel.org Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Jani Nikula <jani.nikula@intel.com>
2014-06-17 15:56:24 +07:00
spin_unlock(&file->table_lock);
/*
* Although we have a valid reference on file->pid, that does
* not guarantee that the task_struct who called get_pid() is
* still alive (e.g. get_pid(current) => fork() => exit()).
* Therefore, we need to protect this ->comm access using RCU.
*/
rcu_read_lock();
task = pid_task(file->pid, PIDTYPE_PID);
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 03:13:08 +07:00
print_file_stats(m, task ? task->comm : "<unknown>", stats);
rcu_read_unlock();
}
mutex_unlock(&dev->filelist_mutex);
return 0;
}
static int i915_gem_gtt_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
uintptr_t list = (uintptr_t) node->info_ent->data;
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_gem_object *obj;
u64 total_obj_size, total_gtt_size;
int count, ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
total_obj_size = total_gtt_size = count = 0;
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
if (list == PINNED_LIST && !i915_gem_obj_is_pinned(obj))
continue;
seq_puts(m, " ");
describe_obj(m, obj);
seq_putc(m, '\n');
total_obj_size += obj->base.size;
total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
count++;
}
mutex_unlock(&dev->struct_mutex);
seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
count, total_obj_size, total_gtt_size);
return 0;
}
static int i915_gem_pageflip_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_crtc *crtc;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
for_each_intel_crtc(dev, crtc) {
const char pipe = pipe_name(crtc->pipe);
const char plane = plane_name(crtc->plane);
struct intel_flip_work *work;
spin_lock_irq(&dev->event_lock);
drm/i915: Revert async unpin and nonblocking atomic commit This reverts the following patches: d55dbd06bb5e1399aba9ab5227465339d1bbefff drm/i915: Allow nonblocking update of pageflips. 15c86bdb760185e871c7a0f559978328aa500971 drm/i915: Check for unpin correctness. 95c2ccdc82d520f59ae3b6fdc097b63c9b7082bb Reapply "drm/i915: Avoid stalling on pending flips for legacy cursor updates" a6747b7304a9d66758a196d885dab8bbfa5e7d1f drm/i915: Make unpin async. 03f476e1fcb42fca88fc50b94b0d3adbdbe887f0 drm/i915: Prepare connectors for nonblocking checks. 2099deffef4404f949ba1b68d2b17e0608190bc2 drm/i915: Pass atomic states to fbc update functions. ee7171af72c39c18b7d7571419a4ac6ca30aea66 drm/i915: Remove reset_counter from intel_crtc. 2ee004f7c59b2e642f0bb2834f847d756f2dd7b7 drm/i915: Remove queue_flip pointer. b8d2afae557dbb9b9c7bc6f6ec4f5278f3c4c34e drm/i915: Remove use_mmio_flip kernel parameter. 8dd634d922615ec3a9af7976029110ec037f8b50 drm/i915: Remove cs based page flip support. 143f73b3bf48c089b40f58462dd7f7c199fd4f0f drm/i915: Rework intel_crtc_page_flip to be almost atomic, v3. 84fc494b64e8c591be446a966b7447a9db519c88 drm/i915: Add the exclusive fence to plane_state. 6885843ae164e11f6c802209d06921e678a3f3f3 drm/i915: Convert flip_work to a list. aa420ddd8eeaa5df579894a412289e4d07c2fee9 drm/i915: Allow mmio updates on all platforms, v2. afee4d8707ab1f21b7668de995be3a5961e83582 Revert "drm/i915: Avoid stalling on pending flips for legacy cursor updates" "drm/i915: Allow nonblocking update of pageflips" should have been split up, misses a proper commit message and seems to cause issues in the legacy page_flip path as demonstrated by kms_flip. "drm/i915: Make unpin async" doesn't handle the unthrottled cursor updates correctly, leading to an apparent pin count leak. This is caught by the WARN_ON in i915_gem_object_do_pin which screams if we have more than DRM_I915_GEM_OBJECT_MAX_PIN_COUNT pins. Unfortuantely we can't just revert these two because this patch series came with a built-in bisect breakage in the form of temporarily removing the unthrottled cursor update hack for legacy cursor ioctl. Therefore there's no other option than to revert the entire pile :( There's one tiny conflict in intel_drv.h due to other patches, nothing serious. Normally I'd wait a bit longer with doing a maintainer revert, but since the minimal set of patches we need to revert (due to the bisect breakage) is so big, time is running out fast. And very soon (especially after a few attempts at fixing issues) it'll be really hard to revert things cleanly. Lessons learned: - Not a good idea to rush the review (done by someone fairly new to the area) and not make sure domain experts had a chance to read it. - Patches should be properly split up. I only looked at the two patches that should be reverted in detail, but both look like the mix up different things in one patch. - Patches really should have proper commit messages. Especially when doing more than one thing, and especially when touching critical and tricky core code. - Building a patch series and r-b stamping it when it has a built-in bisect breakage is not a good idea. - I also think we need to stop building up technical debt by postponing atomic igt testcases even longer. I think it's clear that there's enough corner cases in this beast that we really need to have the testcases _before_ the next step lands. Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Patrik Jakobsson <patrik.jakobsson@linux.intel.com> Cc: John Harrison <John.C.Harrison@Intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Acked-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Jani Nikula <jani.nikula@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
2016-05-24 22:13:53 +07:00
work = crtc->flip_work;
if (work == NULL) {
seq_printf(m, "No flip due on pipe %c (plane %c)\n",
pipe, plane);
} else {
drm/i915: Revert async unpin and nonblocking atomic commit This reverts the following patches: d55dbd06bb5e1399aba9ab5227465339d1bbefff drm/i915: Allow nonblocking update of pageflips. 15c86bdb760185e871c7a0f559978328aa500971 drm/i915: Check for unpin correctness. 95c2ccdc82d520f59ae3b6fdc097b63c9b7082bb Reapply "drm/i915: Avoid stalling on pending flips for legacy cursor updates" a6747b7304a9d66758a196d885dab8bbfa5e7d1f drm/i915: Make unpin async. 03f476e1fcb42fca88fc50b94b0d3adbdbe887f0 drm/i915: Prepare connectors for nonblocking checks. 2099deffef4404f949ba1b68d2b17e0608190bc2 drm/i915: Pass atomic states to fbc update functions. ee7171af72c39c18b7d7571419a4ac6ca30aea66 drm/i915: Remove reset_counter from intel_crtc. 2ee004f7c59b2e642f0bb2834f847d756f2dd7b7 drm/i915: Remove queue_flip pointer. b8d2afae557dbb9b9c7bc6f6ec4f5278f3c4c34e drm/i915: Remove use_mmio_flip kernel parameter. 8dd634d922615ec3a9af7976029110ec037f8b50 drm/i915: Remove cs based page flip support. 143f73b3bf48c089b40f58462dd7f7c199fd4f0f drm/i915: Rework intel_crtc_page_flip to be almost atomic, v3. 84fc494b64e8c591be446a966b7447a9db519c88 drm/i915: Add the exclusive fence to plane_state. 6885843ae164e11f6c802209d06921e678a3f3f3 drm/i915: Convert flip_work to a list. aa420ddd8eeaa5df579894a412289e4d07c2fee9 drm/i915: Allow mmio updates on all platforms, v2. afee4d8707ab1f21b7668de995be3a5961e83582 Revert "drm/i915: Avoid stalling on pending flips for legacy cursor updates" "drm/i915: Allow nonblocking update of pageflips" should have been split up, misses a proper commit message and seems to cause issues in the legacy page_flip path as demonstrated by kms_flip. "drm/i915: Make unpin async" doesn't handle the unthrottled cursor updates correctly, leading to an apparent pin count leak. This is caught by the WARN_ON in i915_gem_object_do_pin which screams if we have more than DRM_I915_GEM_OBJECT_MAX_PIN_COUNT pins. Unfortuantely we can't just revert these two because this patch series came with a built-in bisect breakage in the form of temporarily removing the unthrottled cursor update hack for legacy cursor ioctl. Therefore there's no other option than to revert the entire pile :( There's one tiny conflict in intel_drv.h due to other patches, nothing serious. Normally I'd wait a bit longer with doing a maintainer revert, but since the minimal set of patches we need to revert (due to the bisect breakage) is so big, time is running out fast. And very soon (especially after a few attempts at fixing issues) it'll be really hard to revert things cleanly. Lessons learned: - Not a good idea to rush the review (done by someone fairly new to the area) and not make sure domain experts had a chance to read it. - Patches should be properly split up. I only looked at the two patches that should be reverted in detail, but both look like the mix up different things in one patch. - Patches really should have proper commit messages. Especially when doing more than one thing, and especially when touching critical and tricky core code. - Building a patch series and r-b stamping it when it has a built-in bisect breakage is not a good idea. - I also think we need to stop building up technical debt by postponing atomic igt testcases even longer. I think it's clear that there's enough corner cases in this beast that we really need to have the testcases _before_ the next step lands. Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Patrik Jakobsson <patrik.jakobsson@linux.intel.com> Cc: John Harrison <John.C.Harrison@Intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Acked-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Jani Nikula <jani.nikula@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
2016-05-24 22:13:53 +07:00
u32 pending;
u32 addr;
pending = atomic_read(&work->pending);
if (pending) {
seq_printf(m, "Flip ioctl preparing on pipe %c (plane %c)\n",
pipe, plane);
} else {
seq_printf(m, "Flip pending (waiting for vsync) on pipe %c (plane %c)\n",
pipe, plane);
}
if (work->flip_queued_req) {
struct intel_engine_cs *engine = i915_gem_request_get_engine(work->flip_queued_req);
seq_printf(m, "Flip queued on %s at seqno %x, next seqno %x [current breadcrumb %x], completed? %d\n",
engine->name,
i915_gem_request_get_seqno(work->flip_queued_req),
dev_priv->next_seqno,
intel_engine_get_seqno(engine),
i915_gem_request_completed(work->flip_queued_req));
drm/i915: Revert async unpin and nonblocking atomic commit This reverts the following patches: d55dbd06bb5e1399aba9ab5227465339d1bbefff drm/i915: Allow nonblocking update of pageflips. 15c86bdb760185e871c7a0f559978328aa500971 drm/i915: Check for unpin correctness. 95c2ccdc82d520f59ae3b6fdc097b63c9b7082bb Reapply "drm/i915: Avoid stalling on pending flips for legacy cursor updates" a6747b7304a9d66758a196d885dab8bbfa5e7d1f drm/i915: Make unpin async. 03f476e1fcb42fca88fc50b94b0d3adbdbe887f0 drm/i915: Prepare connectors for nonblocking checks. 2099deffef4404f949ba1b68d2b17e0608190bc2 drm/i915: Pass atomic states to fbc update functions. ee7171af72c39c18b7d7571419a4ac6ca30aea66 drm/i915: Remove reset_counter from intel_crtc. 2ee004f7c59b2e642f0bb2834f847d756f2dd7b7 drm/i915: Remove queue_flip pointer. b8d2afae557dbb9b9c7bc6f6ec4f5278f3c4c34e drm/i915: Remove use_mmio_flip kernel parameter. 8dd634d922615ec3a9af7976029110ec037f8b50 drm/i915: Remove cs based page flip support. 143f73b3bf48c089b40f58462dd7f7c199fd4f0f drm/i915: Rework intel_crtc_page_flip to be almost atomic, v3. 84fc494b64e8c591be446a966b7447a9db519c88 drm/i915: Add the exclusive fence to plane_state. 6885843ae164e11f6c802209d06921e678a3f3f3 drm/i915: Convert flip_work to a list. aa420ddd8eeaa5df579894a412289e4d07c2fee9 drm/i915: Allow mmio updates on all platforms, v2. afee4d8707ab1f21b7668de995be3a5961e83582 Revert "drm/i915: Avoid stalling on pending flips for legacy cursor updates" "drm/i915: Allow nonblocking update of pageflips" should have been split up, misses a proper commit message and seems to cause issues in the legacy page_flip path as demonstrated by kms_flip. "drm/i915: Make unpin async" doesn't handle the unthrottled cursor updates correctly, leading to an apparent pin count leak. This is caught by the WARN_ON in i915_gem_object_do_pin which screams if we have more than DRM_I915_GEM_OBJECT_MAX_PIN_COUNT pins. Unfortuantely we can't just revert these two because this patch series came with a built-in bisect breakage in the form of temporarily removing the unthrottled cursor update hack for legacy cursor ioctl. Therefore there's no other option than to revert the entire pile :( There's one tiny conflict in intel_drv.h due to other patches, nothing serious. Normally I'd wait a bit longer with doing a maintainer revert, but since the minimal set of patches we need to revert (due to the bisect breakage) is so big, time is running out fast. And very soon (especially after a few attempts at fixing issues) it'll be really hard to revert things cleanly. Lessons learned: - Not a good idea to rush the review (done by someone fairly new to the area) and not make sure domain experts had a chance to read it. - Patches should be properly split up. I only looked at the two patches that should be reverted in detail, but both look like the mix up different things in one patch. - Patches really should have proper commit messages. Especially when doing more than one thing, and especially when touching critical and tricky core code. - Building a patch series and r-b stamping it when it has a built-in bisect breakage is not a good idea. - I also think we need to stop building up technical debt by postponing atomic igt testcases even longer. I think it's clear that there's enough corner cases in this beast that we really need to have the testcases _before_ the next step lands. Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Patrik Jakobsson <patrik.jakobsson@linux.intel.com> Cc: John Harrison <John.C.Harrison@Intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Acked-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Acked-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Jani Nikula <jani.nikula@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
2016-05-24 22:13:53 +07:00
} else
seq_printf(m, "Flip not associated with any ring\n");
seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n",
work->flip_queued_vblank,
work->flip_ready_vblank,
intel_crtc_get_vblank_counter(crtc));
seq_printf(m, "%d prepares\n", atomic_read(&work->pending));
if (INTEL_INFO(dev)->gen >= 4)
addr = I915_HI_DISPBASE(I915_READ(DSPSURF(crtc->plane)));
else
addr = I915_READ(DSPADDR(crtc->plane));
seq_printf(m, "Current scanout address 0x%08x\n", addr);
if (work->pending_flip_obj) {
seq_printf(m, "New framebuffer address 0x%08lx\n", (long)work->gtt_offset);
seq_printf(m, "MMIO update completed? %d\n", addr == work->gtt_offset);
}
}
spin_unlock_irq(&dev->event_lock);
}
mutex_unlock(&dev->struct_mutex);
return 0;
}
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 03:13:08 +07:00
static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 03:13:08 +07:00
struct drm_i915_gem_object *obj;
struct intel_engine_cs *engine;
int total = 0;
int ret, j;
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 03:13:08 +07:00
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
for_each_engine(engine, dev_priv) {
for (j = 0; j < ARRAY_SIZE(engine->batch_pool.cache_list); j++) {
int count;
count = 0;
list_for_each_entry(obj,
&engine->batch_pool.cache_list[j],
batch_pool_link)
count++;
seq_printf(m, "%s cache[%d]: %d objects\n",
engine->name, j, count);
list_for_each_entry(obj,
&engine->batch_pool.cache_list[j],
batch_pool_link) {
seq_puts(m, " ");
describe_obj(m, obj);
seq_putc(m, '\n');
}
total += count;
}
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 03:13:08 +07:00
}
seq_printf(m, "total: %d\n", total);
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 03:13:08 +07:00
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int i915_gem_request_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_engine_cs *engine;
struct drm_i915_gem_request *req;
int ret, any;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
any = 0;
for_each_engine(engine, dev_priv) {
int count;
count = 0;
list_for_each_entry(req, &engine->request_list, list)
count++;
if (count == 0)
continue;
seq_printf(m, "%s requests: %d\n", engine->name, count);
list_for_each_entry(req, &engine->request_list, list) {
struct task_struct *task;
rcu_read_lock();
task = NULL;
if (req->pid)
task = pid_task(req->pid, PIDTYPE_PID);
seq_printf(m, " %x @ %d: %s [%d]\n",
req->fence.seqno,
(int) (jiffies - req->emitted_jiffies),
task ? task->comm : "<unknown>",
task ? task->pid : -1);
rcu_read_unlock();
}
any++;
}
mutex_unlock(&dev->struct_mutex);
if (any == 0)
seq_puts(m, "No requests\n");
return 0;
}
static void i915_ring_seqno_info(struct seq_file *m,
struct intel_engine_cs *engine)
{
drm/i915: Slaughter the thundering i915_wait_request herd One particularly stressful scenario consists of many independent tasks all competing for GPU time and waiting upon the results (e.g. realtime transcoding of many, many streams). One bottleneck in particular is that each client waits on its own results, but every client is woken up after every batchbuffer - hence the thunder of hooves as then every client must do its heavyweight dance to read a coherent seqno to see if it is the lucky one. Ideally, we only want one client to wake up after the interrupt and check its request for completion. Since the requests must retire in order, we can select the first client on the oldest request to be woken. Once that client has completed his wait, we can then wake up the next client and so on. However, all clients then incur latency as every process in the chain may be delayed for scheduling - this may also then cause some priority inversion. To reduce the latency, when a client is added or removed from the list, we scan the tree for completed seqno and wake up all the completed waiters in parallel. Using igt/benchmarks/gem_latency, we can demonstrate this effect. The benchmark measures the number of GPU cycles between completion of a batch and the client waking up from a call to wait-ioctl. With many concurrent waiters, with each on a different request, we observe that the wakeup latency before the patch scales nearly linearly with the number of waiters (before external factors kick in making the scaling much worse). After applying the patch, we can see that only the single waiter for the request is being woken up, providing a constant wakeup latency for every operation. However, the situation is not quite as rosy for many waiters on the same request, though to the best of my knowledge this is much less likely in practice. Here, we can observe that the concurrent waiters incur extra latency from being woken up by the solitary bottom-half, rather than directly by the interrupt. This appears to be scheduler induced (having discounted adverse effects from having a rbtree walk/erase in the wakeup path), each additional wake_up_process() costs approximately 1us on big core. Another effect of performing the secondary wakeups from the first bottom-half is the incurred delay this imposes on high priority threads - rather than immediately returning to userspace and leaving the interrupt handler to wake the others. To offset the delay incurred with additional waiters on a request, we could use a hybrid scheme that did a quick read in the interrupt handler and dequeued all the completed waiters (incurring the overhead in the interrupt handler, not the best plan either as we then incur GPU submission latency) but we would still have to wake up the bottom-half every time to do the heavyweight slow read. Or we could only kick the waiters on the seqno with the same priority as the current task (i.e. in the realtime waiter scenario, only it is woken up immediately by the interrupt and simply queues the next waiter before returning to userspace, minimising its delay at the expense of the chain, and also reducing contention on its scheduler runqueue). This is effective at avoid long pauses in the interrupt handler and at avoiding the extra latency in realtime/high-priority waiters. v2: Convert from a kworker per engine into a dedicated kthread for the bottom-half. v3: Rename request members and tweak comments. v4: Use a per-engine spinlock in the breadcrumbs bottom-half. v5: Fix race in locklessly checking waiter status and kicking the task on adding a new waiter. v6: Fix deciding when to force the timer to hide missing interrupts. v7: Move the bottom-half from the kthread to the first client process. v8: Reword a few comments v9: Break the busy loop when the interrupt is unmasked or has fired. v10: Comments, unnecessary churn, better debugging from Tvrtko v11: Wake all completed waiters on removing the current bottom-half to reduce the latency of waking up a herd of clients all waiting on the same request. v12: Rearrange missed-interrupt fault injection so that it works with igt/drv_missed_irq_hang v13: Rename intel_breadcrumb and friends to intel_wait in preparation for signal handling. v14: RCU commentary, assert_spin_locked v15: Hide BUG_ON behind the compiler; report on gem_latency findings. v16: Sort seqno-groups by priority so that first-waiter has the highest task priority (and so avoid priority inversion). v17: Add waiters to post-mortem GPU hang state. v18: Return early for a completed wait after acquiring the spinlock. Avoids adding ourselves to the tree if the is already complete, and skips the awkward question of why we don't do completion wakeups for waits earlier than or equal to ourselves. v19: Prepare for init_breadcrumbs to fail. Later patches may want to allocate during init, so be prepared to propagate back the error code. Testcase: igt/gem_concurrent_blit Testcase: igt/benchmarks/gem_latency Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: "Rogozhkin, Dmitry V" <dmitry.v.rogozhkin@intel.com> Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Dave Gordon <david.s.gordon@intel.com> Cc: "Goel, Akash" <akash.goel@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> #v18 Link: http://patchwork.freedesktop.org/patch/msgid/1467390209-3576-6-git-send-email-chris@chris-wilson.co.uk
2016-07-01 23:23:15 +07:00
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct rb_node *rb;
seq_printf(m, "Current sequence (%s): %x\n",
engine->name, intel_engine_get_seqno(engine));
seq_printf(m, "Current user interrupts (%s): %lx\n",
engine->name, READ_ONCE(engine->breadcrumbs.irq_wakeups));
drm/i915: Slaughter the thundering i915_wait_request herd One particularly stressful scenario consists of many independent tasks all competing for GPU time and waiting upon the results (e.g. realtime transcoding of many, many streams). One bottleneck in particular is that each client waits on its own results, but every client is woken up after every batchbuffer - hence the thunder of hooves as then every client must do its heavyweight dance to read a coherent seqno to see if it is the lucky one. Ideally, we only want one client to wake up after the interrupt and check its request for completion. Since the requests must retire in order, we can select the first client on the oldest request to be woken. Once that client has completed his wait, we can then wake up the next client and so on. However, all clients then incur latency as every process in the chain may be delayed for scheduling - this may also then cause some priority inversion. To reduce the latency, when a client is added or removed from the list, we scan the tree for completed seqno and wake up all the completed waiters in parallel. Using igt/benchmarks/gem_latency, we can demonstrate this effect. The benchmark measures the number of GPU cycles between completion of a batch and the client waking up from a call to wait-ioctl. With many concurrent waiters, with each on a different request, we observe that the wakeup latency before the patch scales nearly linearly with the number of waiters (before external factors kick in making the scaling much worse). After applying the patch, we can see that only the single waiter for the request is being woken up, providing a constant wakeup latency for every operation. However, the situation is not quite as rosy for many waiters on the same request, though to the best of my knowledge this is much less likely in practice. Here, we can observe that the concurrent waiters incur extra latency from being woken up by the solitary bottom-half, rather than directly by the interrupt. This appears to be scheduler induced (having discounted adverse effects from having a rbtree walk/erase in the wakeup path), each additional wake_up_process() costs approximately 1us on big core. Another effect of performing the secondary wakeups from the first bottom-half is the incurred delay this imposes on high priority threads - rather than immediately returning to userspace and leaving the interrupt handler to wake the others. To offset the delay incurred with additional waiters on a request, we could use a hybrid scheme that did a quick read in the interrupt handler and dequeued all the completed waiters (incurring the overhead in the interrupt handler, not the best plan either as we then incur GPU submission latency) but we would still have to wake up the bottom-half every time to do the heavyweight slow read. Or we could only kick the waiters on the seqno with the same priority as the current task (i.e. in the realtime waiter scenario, only it is woken up immediately by the interrupt and simply queues the next waiter before returning to userspace, minimising its delay at the expense of the chain, and also reducing contention on its scheduler runqueue). This is effective at avoid long pauses in the interrupt handler and at avoiding the extra latency in realtime/high-priority waiters. v2: Convert from a kworker per engine into a dedicated kthread for the bottom-half. v3: Rename request members and tweak comments. v4: Use a per-engine spinlock in the breadcrumbs bottom-half. v5: Fix race in locklessly checking waiter status and kicking the task on adding a new waiter. v6: Fix deciding when to force the timer to hide missing interrupts. v7: Move the bottom-half from the kthread to the first client process. v8: Reword a few comments v9: Break the busy loop when the interrupt is unmasked or has fired. v10: Comments, unnecessary churn, better debugging from Tvrtko v11: Wake all completed waiters on removing the current bottom-half to reduce the latency of waking up a herd of clients all waiting on the same request. v12: Rearrange missed-interrupt fault injection so that it works with igt/drv_missed_irq_hang v13: Rename intel_breadcrumb and friends to intel_wait in preparation for signal handling. v14: RCU commentary, assert_spin_locked v15: Hide BUG_ON behind the compiler; report on gem_latency findings. v16: Sort seqno-groups by priority so that first-waiter has the highest task priority (and so avoid priority inversion). v17: Add waiters to post-mortem GPU hang state. v18: Return early for a completed wait after acquiring the spinlock. Avoids adding ourselves to the tree if the is already complete, and skips the awkward question of why we don't do completion wakeups for waits earlier than or equal to ourselves. v19: Prepare for init_breadcrumbs to fail. Later patches may want to allocate during init, so be prepared to propagate back the error code. Testcase: igt/gem_concurrent_blit Testcase: igt/benchmarks/gem_latency Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: "Rogozhkin, Dmitry V" <dmitry.v.rogozhkin@intel.com> Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Dave Gordon <david.s.gordon@intel.com> Cc: "Goel, Akash" <akash.goel@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> #v18 Link: http://patchwork.freedesktop.org/patch/msgid/1467390209-3576-6-git-send-email-chris@chris-wilson.co.uk
2016-07-01 23:23:15 +07:00
spin_lock(&b->lock);
for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
struct intel_wait *w = container_of(rb, typeof(*w), node);
seq_printf(m, "Waiting (%s): %s [%d] on %x\n",
engine->name, w->tsk->comm, w->tsk->pid, w->seqno);
}
spin_unlock(&b->lock);
}
static int i915_gem_seqno_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_engine_cs *engine;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
for_each_engine(engine, dev_priv)
i915_ring_seqno_info(m, engine);
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int i915_interrupt_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_engine_cs *engine;
int ret, i, pipe;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
if (IS_CHERRYVIEW(dev)) {
seq_printf(m, "Master Interrupt Control:\t%08x\n",
I915_READ(GEN8_MASTER_IRQ));
seq_printf(m, "Display IER:\t%08x\n",
I915_READ(VLV_IER));
seq_printf(m, "Display IIR:\t%08x\n",
I915_READ(VLV_IIR));
seq_printf(m, "Display IIR_RW:\t%08x\n",
I915_READ(VLV_IIR_RW));
seq_printf(m, "Display IMR:\t%08x\n",
I915_READ(VLV_IMR));
for_each_pipe(dev_priv, pipe)
seq_printf(m, "Pipe %c stat:\t%08x\n",
pipe_name(pipe),
I915_READ(PIPESTAT(pipe)));
seq_printf(m, "Port hotplug:\t%08x\n",
I915_READ(PORT_HOTPLUG_EN));
seq_printf(m, "DPFLIPSTAT:\t%08x\n",
I915_READ(VLV_DPFLIPSTAT));
seq_printf(m, "DPINVGTT:\t%08x\n",
I915_READ(DPINVGTT));
for (i = 0; i < 4; i++) {
seq_printf(m, "GT Interrupt IMR %d:\t%08x\n",
i, I915_READ(GEN8_GT_IMR(i)));
seq_printf(m, "GT Interrupt IIR %d:\t%08x\n",
i, I915_READ(GEN8_GT_IIR(i)));
seq_printf(m, "GT Interrupt IER %d:\t%08x\n",
i, I915_READ(GEN8_GT_IER(i)));
}
seq_printf(m, "PCU interrupt mask:\t%08x\n",
I915_READ(GEN8_PCU_IMR));
seq_printf(m, "PCU interrupt identity:\t%08x\n",
I915_READ(GEN8_PCU_IIR));
seq_printf(m, "PCU interrupt enable:\t%08x\n",
I915_READ(GEN8_PCU_IER));
} else if (INTEL_INFO(dev)->gen >= 8) {
seq_printf(m, "Master Interrupt Control:\t%08x\n",
I915_READ(GEN8_MASTER_IRQ));
for (i = 0; i < 4; i++) {
seq_printf(m, "GT Interrupt IMR %d:\t%08x\n",
i, I915_READ(GEN8_GT_IMR(i)));
seq_printf(m, "GT Interrupt IIR %d:\t%08x\n",
i, I915_READ(GEN8_GT_IIR(i)));
seq_printf(m, "GT Interrupt IER %d:\t%08x\n",
i, I915_READ(GEN8_GT_IER(i)));
}
for_each_pipe(dev_priv, pipe) {
enum intel_display_power_domain power_domain;
power_domain = POWER_DOMAIN_PIPE(pipe);
if (!intel_display_power_get_if_enabled(dev_priv,
power_domain)) {
seq_printf(m, "Pipe %c power disabled\n",
pipe_name(pipe));
continue;
}
seq_printf(m, "Pipe %c IMR:\t%08x\n",
pipe_name(pipe),
I915_READ(GEN8_DE_PIPE_IMR(pipe)));
seq_printf(m, "Pipe %c IIR:\t%08x\n",
pipe_name(pipe),
I915_READ(GEN8_DE_PIPE_IIR(pipe)));
seq_printf(m, "Pipe %c IER:\t%08x\n",
pipe_name(pipe),
I915_READ(GEN8_DE_PIPE_IER(pipe)));
intel_display_power_put(dev_priv, power_domain);
}
seq_printf(m, "Display Engine port interrupt mask:\t%08x\n",
I915_READ(GEN8_DE_PORT_IMR));
seq_printf(m, "Display Engine port interrupt identity:\t%08x\n",
I915_READ(GEN8_DE_PORT_IIR));
seq_printf(m, "Display Engine port interrupt enable:\t%08x\n",
I915_READ(GEN8_DE_PORT_IER));
seq_printf(m, "Display Engine misc interrupt mask:\t%08x\n",
I915_READ(GEN8_DE_MISC_IMR));
seq_printf(m, "Display Engine misc interrupt identity:\t%08x\n",
I915_READ(GEN8_DE_MISC_IIR));
seq_printf(m, "Display Engine misc interrupt enable:\t%08x\n",
I915_READ(GEN8_DE_MISC_IER));
seq_printf(m, "PCU interrupt mask:\t%08x\n",
I915_READ(GEN8_PCU_IMR));
seq_printf(m, "PCU interrupt identity:\t%08x\n",
I915_READ(GEN8_PCU_IIR));
seq_printf(m, "PCU interrupt enable:\t%08x\n",
I915_READ(GEN8_PCU_IER));
} else if (IS_VALLEYVIEW(dev)) {
seq_printf(m, "Display IER:\t%08x\n",
I915_READ(VLV_IER));
seq_printf(m, "Display IIR:\t%08x\n",
I915_READ(VLV_IIR));
seq_printf(m, "Display IIR_RW:\t%08x\n",
I915_READ(VLV_IIR_RW));
seq_printf(m, "Display IMR:\t%08x\n",
I915_READ(VLV_IMR));
for_each_pipe(dev_priv, pipe)
seq_printf(m, "Pipe %c stat:\t%08x\n",
pipe_name(pipe),
I915_READ(PIPESTAT(pipe)));
seq_printf(m, "Master IER:\t%08x\n",
I915_READ(VLV_MASTER_IER));
seq_printf(m, "Render IER:\t%08x\n",
I915_READ(GTIER));
seq_printf(m, "Render IIR:\t%08x\n",
I915_READ(GTIIR));
seq_printf(m, "Render IMR:\t%08x\n",
I915_READ(GTIMR));
seq_printf(m, "PM IER:\t\t%08x\n",
I915_READ(GEN6_PMIER));
seq_printf(m, "PM IIR:\t\t%08x\n",
I915_READ(GEN6_PMIIR));
seq_printf(m, "PM IMR:\t\t%08x\n",
I915_READ(GEN6_PMIMR));
seq_printf(m, "Port hotplug:\t%08x\n",
I915_READ(PORT_HOTPLUG_EN));
seq_printf(m, "DPFLIPSTAT:\t%08x\n",
I915_READ(VLV_DPFLIPSTAT));
seq_printf(m, "DPINVGTT:\t%08x\n",
I915_READ(DPINVGTT));
} else if (!HAS_PCH_SPLIT(dev)) {
seq_printf(m, "Interrupt enable: %08x\n",
I915_READ(IER));
seq_printf(m, "Interrupt identity: %08x\n",
I915_READ(IIR));
seq_printf(m, "Interrupt mask: %08x\n",
I915_READ(IMR));
for_each_pipe(dev_priv, pipe)
seq_printf(m, "Pipe %c stat: %08x\n",
pipe_name(pipe),
I915_READ(PIPESTAT(pipe)));
} else {
seq_printf(m, "North Display Interrupt enable: %08x\n",
I915_READ(DEIER));
seq_printf(m, "North Display Interrupt identity: %08x\n",
I915_READ(DEIIR));
seq_printf(m, "North Display Interrupt mask: %08x\n",
I915_READ(DEIMR));
seq_printf(m, "South Display Interrupt enable: %08x\n",
I915_READ(SDEIER));
seq_printf(m, "South Display Interrupt identity: %08x\n",
I915_READ(SDEIIR));
seq_printf(m, "South Display Interrupt mask: %08x\n",
I915_READ(SDEIMR));
seq_printf(m, "Graphics Interrupt enable: %08x\n",
I915_READ(GTIER));
seq_printf(m, "Graphics Interrupt identity: %08x\n",
I915_READ(GTIIR));
seq_printf(m, "Graphics Interrupt mask: %08x\n",
I915_READ(GTIMR));
}
for_each_engine(engine, dev_priv) {
if (INTEL_INFO(dev)->gen >= 6) {
seq_printf(m,
"Graphics Interrupt mask (%s): %08x\n",
engine->name, I915_READ_IMR(engine));
}
i915_ring_seqno_info(m, engine);
}
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
int i, ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs);
for (i = 0; i < dev_priv->num_fence_regs; i++) {
struct drm_i915_gem_object *obj = dev_priv->fence_regs[i].obj;
drm/i915: Track unbound pages When dealing with a working set larger than the GATT, or even the mappable aperture when touching through the GTT, we end up with evicting objects only to rebind them at a new offset again later. Moving an object into and out of the GTT requires clflushing the pages, thus causing a double-clflush penalty for rebinding. To avoid having to clflush on rebinding, we can track the pages as they are evicted from the GTT and only relinquish those pages on memory pressure. As usual, if it were not for the handling of out-of-memory condition and having to manually shrink our own bo caches, it would be a net reduction of code. Alas. Note: The patch also contains a few changes to the last-hope evict_everything logic in i916_gem_execbuffer.c - we no longer try to only evict the purgeable stuff in a first try (since that's superflous and only helps in OOM corner-cases, not fragmented-gtt trashing situations). Also, the extraction of the get_pages retry loop from bind_to_gtt (and other callsites) to get_pages should imo have been a separate patch. v2: Ditch the newly added put_pages (for unbound objects only) in i915_gem_reset. A quick irc discussion hasn't revealed any important reason for this, so if we need this, I'd like to have a git blame'able explanation for it. v3: Undo the s/drm_malloc_ab/kmalloc/ in get_pages that Chris noticed. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: Split out code movements and rant a bit in the commit message with a few Notes. Done v2] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-08-20 16:40:46 +07:00
seq_printf(m, "Fence %d, pin count = %d, object = ",
i, dev_priv->fence_regs[i].pin_count);
if (obj == NULL)
seq_puts(m, "unused");
else
describe_obj(m, obj);
seq_putc(m, '\n');
}
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int i915_hws_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_engine_cs *engine;
const u32 *hws;
int i;
engine = &dev_priv->engine[(uintptr_t)node->info_ent->data];
hws = engine->status_page.page_addr;
if (hws == NULL)
return 0;
for (i = 0; i < 4096 / sizeof(u32) / 4; i += 4) {
seq_printf(m, "0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
i * 4,
hws[i], hws[i + 1], hws[i + 2], hws[i + 3]);
}
return 0;
}
static ssize_t
i915_error_state_write(struct file *filp,
const char __user *ubuf,
size_t cnt,
loff_t *ppos)
{
struct i915_error_state_file_priv *error_priv = filp->private_data;
struct drm_device *dev = error_priv->dev;
int ret;
DRM_DEBUG_DRIVER("Resetting error state\n");
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
i915_destroy_error_state(dev);
mutex_unlock(&dev->struct_mutex);
return cnt;
}
static int i915_error_state_open(struct inode *inode, struct file *file)
{
struct drm_device *dev = inode->i_private;
struct i915_error_state_file_priv *error_priv;
error_priv = kzalloc(sizeof(*error_priv), GFP_KERNEL);
if (!error_priv)
return -ENOMEM;
error_priv->dev = dev;
i915_error_state_get(dev, error_priv);
file->private_data = error_priv;
return 0;
}
static int i915_error_state_release(struct inode *inode, struct file *file)
{
struct i915_error_state_file_priv *error_priv = file->private_data;
i915_error_state_put(error_priv);
kfree(error_priv);
return 0;
}
static ssize_t i915_error_state_read(struct file *file, char __user *userbuf,
size_t count, loff_t *pos)
{
struct i915_error_state_file_priv *error_priv = file->private_data;
struct drm_i915_error_state_buf error_str;
loff_t tmp_pos = 0;
ssize_t ret_count = 0;
int ret;
ret = i915_error_state_buf_init(&error_str, to_i915(error_priv->dev), count, *pos);
if (ret)
return ret;
ret = i915_error_state_to_str(&error_str, error_priv);
if (ret)
goto out;
ret_count = simple_read_from_buffer(userbuf, count, &tmp_pos,
error_str.buf,
error_str.bytes);
if (ret_count < 0)
ret = ret_count;
else
*pos = error_str.start + ret_count;
out:
i915_error_state_buf_release(&error_str);
return ret ?: ret_count;
}
static const struct file_operations i915_error_state_fops = {
.owner = THIS_MODULE,
.open = i915_error_state_open,
.read = i915_error_state_read,
.write = i915_error_state_write,
.llseek = default_llseek,
.release = i915_error_state_release,
};
static int
i915_next_seqno_get(void *data, u64 *val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
*val = dev_priv->next_seqno;
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int
i915_next_seqno_set(void *data, u64 val)
{
struct drm_device *dev = data;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
ret = i915_gem_set_seqno(dev, val);
mutex_unlock(&dev->struct_mutex);
return ret;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops,
i915_next_seqno_get, i915_next_seqno_set,
"0x%llx\n");
static int i915_frequency_info(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
int ret = 0;
intel_runtime_pm_get(dev_priv);
if (IS_GEN5(dev)) {
u16 rgvswctl = I915_READ16(MEMSWCTL);
u16 rgvstat = I915_READ16(MEMSTAT_ILK);
seq_printf(m, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf);
seq_printf(m, "Requested VID: %d\n", rgvswctl & 0x3f);
seq_printf(m, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >>
MEMSTAT_VID_SHIFT);
seq_printf(m, "Current P-state: %d\n",
(rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT);
} else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
u32 freq_sts;
mutex_lock(&dev_priv->rps.hw_lock);
freq_sts = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq);
seq_printf(m, "actual GPU freq: %d MHz\n",
intel_gpu_freq(dev_priv, (freq_sts >> 8) & 0xff));
seq_printf(m, "current GPU freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq));
seq_printf(m, "max GPU freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
seq_printf(m, "min GPU freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.min_freq));
seq_printf(m, "idle GPU freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq));
seq_printf(m,
"efficient (RPe) frequency: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq));
mutex_unlock(&dev_priv->rps.hw_lock);
} else if (INTEL_INFO(dev)->gen >= 6) {
u32 rp_state_limits;
u32 gt_perf_status;
u32 rp_state_cap;
u32 rpmodectl, rpinclimit, rpdeclimit;
u32 rpstat, cagf, reqf;
u32 rpupei, rpcurup, rpprevup;
u32 rpdownei, rpcurdown, rpprevdown;
u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
int max_freq;
rp_state_limits = I915_READ(GEN6_RP_STATE_LIMITS);
if (IS_BROXTON(dev)) {
rp_state_cap = I915_READ(BXT_RP_STATE_CAP);
gt_perf_status = I915_READ(BXT_GT_PERF_STATUS);
} else {
rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
}
/* RPSTAT1 is in the GT power well */
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
goto out;
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
reqf = I915_READ(GEN6_RPNSWREQ);
if (IS_GEN9(dev))
reqf >>= 23;
else {
reqf &= ~GEN6_TURBO_DISABLE;
if (IS_HASWELL(dev) || IS_BROADWELL(dev))
reqf >>= 24;
else
reqf >>= 25;
}
2015-01-24 02:04:26 +07:00
reqf = intel_gpu_freq(dev_priv, reqf);
rpmodectl = I915_READ(GEN6_RP_CONTROL);
rpinclimit = I915_READ(GEN6_RP_UP_THRESHOLD);
rpdeclimit = I915_READ(GEN6_RP_DOWN_THRESHOLD);
rpstat = I915_READ(GEN6_RPSTAT1);
rpupei = I915_READ(GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
rpcurup = I915_READ(GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
rpprevup = I915_READ(GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
rpdownei = I915_READ(GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
rpcurdown = I915_READ(GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
rpprevdown = I915_READ(GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
if (IS_GEN9(dev))
cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
else
cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
2015-01-24 02:04:26 +07:00
cagf = intel_gpu_freq(dev_priv, cagf);
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
mutex_unlock(&dev->struct_mutex);
if (IS_GEN6(dev) || IS_GEN7(dev)) {
pm_ier = I915_READ(GEN6_PMIER);
pm_imr = I915_READ(GEN6_PMIMR);
pm_isr = I915_READ(GEN6_PMISR);
pm_iir = I915_READ(GEN6_PMIIR);
pm_mask = I915_READ(GEN6_PMINTRMSK);
} else {
pm_ier = I915_READ(GEN8_GT_IER(2));
pm_imr = I915_READ(GEN8_GT_IMR(2));
pm_isr = I915_READ(GEN8_GT_ISR(2));
pm_iir = I915_READ(GEN8_GT_IIR(2));
pm_mask = I915_READ(GEN6_PMINTRMSK);
}
seq_printf(m, "PM IER=0x%08x IMR=0x%08x ISR=0x%08x IIR=0x%08x, MASK=0x%08x\n",
pm_ier, pm_imr, pm_isr, pm_iir, pm_mask);
seq_printf(m, "pm_intr_keep: 0x%08x\n", dev_priv->rps.pm_intr_keep);
seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
seq_printf(m, "Render p-state ratio: %d\n",
(gt_perf_status & (IS_GEN9(dev) ? 0x1ff00 : 0xff00)) >> 8);
seq_printf(m, "Render p-state VID: %d\n",
gt_perf_status & 0xff);
seq_printf(m, "Render p-state limit: %d\n",
rp_state_limits & 0xff);
seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat);
seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl);
seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit);
seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
seq_printf(m, "RPNSWREQ: %dMHz\n", reqf);
seq_printf(m, "CAGF: %dMHz\n", cagf);
seq_printf(m, "RP CUR UP EI: %d (%dus)\n",
rpupei, GT_PM_INTERVAL_TO_US(dev_priv, rpupei));
seq_printf(m, "RP CUR UP: %d (%dus)\n",
rpcurup, GT_PM_INTERVAL_TO_US(dev_priv, rpcurup));
seq_printf(m, "RP PREV UP: %d (%dus)\n",
rpprevup, GT_PM_INTERVAL_TO_US(dev_priv, rpprevup));
seq_printf(m, "Up threshold: %d%%\n",
dev_priv->rps.up_threshold);
seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n",
rpdownei, GT_PM_INTERVAL_TO_US(dev_priv, rpdownei));
seq_printf(m, "RP CUR DOWN: %d (%dus)\n",
rpcurdown, GT_PM_INTERVAL_TO_US(dev_priv, rpcurdown));
seq_printf(m, "RP PREV DOWN: %d (%dus)\n",
rpprevdown, GT_PM_INTERVAL_TO_US(dev_priv, rpprevdown));
seq_printf(m, "Down threshold: %d%%\n",
dev_priv->rps.down_threshold);
max_freq = (IS_BROXTON(dev) ? rp_state_cap >> 0 :
rp_state_cap >> 16) & 0xff;
max_freq *= (IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ?
GEN9_FREQ_SCALER : 1);
seq_printf(m, "Lowest (RPN) frequency: %dMHz\n",
2015-01-24 02:04:26 +07:00
intel_gpu_freq(dev_priv, max_freq));
max_freq = (rp_state_cap & 0xff00) >> 8;
max_freq *= (IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ?
GEN9_FREQ_SCALER : 1);
seq_printf(m, "Nominal (RP1) frequency: %dMHz\n",
2015-01-24 02:04:26 +07:00
intel_gpu_freq(dev_priv, max_freq));
max_freq = (IS_BROXTON(dev) ? rp_state_cap >> 16 :
rp_state_cap >> 0) & 0xff;
max_freq *= (IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ?
GEN9_FREQ_SCALER : 1);
seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
2015-01-24 02:04:26 +07:00
intel_gpu_freq(dev_priv, max_freq));
seq_printf(m, "Max overclocked frequency: %dMHz\n",
2015-01-24 02:04:26 +07:00
intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
seq_printf(m, "Current freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq));
seq_printf(m, "Actual freq: %d MHz\n", cagf);
seq_printf(m, "Idle freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq));
seq_printf(m, "Min freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.min_freq));
seq_printf(m, "Boost freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq));
seq_printf(m, "Max freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
seq_printf(m,
"efficient (RPe) frequency: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq));
} else {
seq_puts(m, "no P-state info available\n");
}
seq_printf(m, "Current CD clock frequency: %d kHz\n", dev_priv->cdclk_freq);
seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq);
seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq);
out:
intel_runtime_pm_put(dev_priv);
return ret;
}
static int i915_hangcheck_info(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_engine_cs *engine;
u64 acthd[I915_NUM_ENGINES];
u32 seqno[I915_NUM_ENGINES];
u32 instdone[I915_NUM_INSTDONE_REG];
enum intel_engine_id id;
int j;
if (!i915.enable_hangcheck) {
seq_printf(m, "Hangcheck disabled\n");
return 0;
}
intel_runtime_pm_get(dev_priv);
for_each_engine_id(engine, dev_priv, id) {
acthd[id] = intel_engine_get_active_head(engine);
seqno[id] = intel_engine_get_seqno(engine);
}
i915_get_extra_instdone(dev_priv, instdone);
intel_runtime_pm_put(dev_priv);
if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) {
seq_printf(m, "Hangcheck active, fires in %dms\n",
jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires -
jiffies));
} else
seq_printf(m, "Hangcheck inactive\n");
for_each_engine_id(engine, dev_priv, id) {
seq_printf(m, "%s:\n", engine->name);
seq_printf(m, "\tseqno = %x [current %x, last %x]\n",
engine->hangcheck.seqno,
seqno[id],
engine->last_submitted_seqno);
drm/i915: Slaughter the thundering i915_wait_request herd One particularly stressful scenario consists of many independent tasks all competing for GPU time and waiting upon the results (e.g. realtime transcoding of many, many streams). One bottleneck in particular is that each client waits on its own results, but every client is woken up after every batchbuffer - hence the thunder of hooves as then every client must do its heavyweight dance to read a coherent seqno to see if it is the lucky one. Ideally, we only want one client to wake up after the interrupt and check its request for completion. Since the requests must retire in order, we can select the first client on the oldest request to be woken. Once that client has completed his wait, we can then wake up the next client and so on. However, all clients then incur latency as every process in the chain may be delayed for scheduling - this may also then cause some priority inversion. To reduce the latency, when a client is added or removed from the list, we scan the tree for completed seqno and wake up all the completed waiters in parallel. Using igt/benchmarks/gem_latency, we can demonstrate this effect. The benchmark measures the number of GPU cycles between completion of a batch and the client waking up from a call to wait-ioctl. With many concurrent waiters, with each on a different request, we observe that the wakeup latency before the patch scales nearly linearly with the number of waiters (before external factors kick in making the scaling much worse). After applying the patch, we can see that only the single waiter for the request is being woken up, providing a constant wakeup latency for every operation. However, the situation is not quite as rosy for many waiters on the same request, though to the best of my knowledge this is much less likely in practice. Here, we can observe that the concurrent waiters incur extra latency from being woken up by the solitary bottom-half, rather than directly by the interrupt. This appears to be scheduler induced (having discounted adverse effects from having a rbtree walk/erase in the wakeup path), each additional wake_up_process() costs approximately 1us on big core. Another effect of performing the secondary wakeups from the first bottom-half is the incurred delay this imposes on high priority threads - rather than immediately returning to userspace and leaving the interrupt handler to wake the others. To offset the delay incurred with additional waiters on a request, we could use a hybrid scheme that did a quick read in the interrupt handler and dequeued all the completed waiters (incurring the overhead in the interrupt handler, not the best plan either as we then incur GPU submission latency) but we would still have to wake up the bottom-half every time to do the heavyweight slow read. Or we could only kick the waiters on the seqno with the same priority as the current task (i.e. in the realtime waiter scenario, only it is woken up immediately by the interrupt and simply queues the next waiter before returning to userspace, minimising its delay at the expense of the chain, and also reducing contention on its scheduler runqueue). This is effective at avoid long pauses in the interrupt handler and at avoiding the extra latency in realtime/high-priority waiters. v2: Convert from a kworker per engine into a dedicated kthread for the bottom-half. v3: Rename request members and tweak comments. v4: Use a per-engine spinlock in the breadcrumbs bottom-half. v5: Fix race in locklessly checking waiter status and kicking the task on adding a new waiter. v6: Fix deciding when to force the timer to hide missing interrupts. v7: Move the bottom-half from the kthread to the first client process. v8: Reword a few comments v9: Break the busy loop when the interrupt is unmasked or has fired. v10: Comments, unnecessary churn, better debugging from Tvrtko v11: Wake all completed waiters on removing the current bottom-half to reduce the latency of waking up a herd of clients all waiting on the same request. v12: Rearrange missed-interrupt fault injection so that it works with igt/drv_missed_irq_hang v13: Rename intel_breadcrumb and friends to intel_wait in preparation for signal handling. v14: RCU commentary, assert_spin_locked v15: Hide BUG_ON behind the compiler; report on gem_latency findings. v16: Sort seqno-groups by priority so that first-waiter has the highest task priority (and so avoid priority inversion). v17: Add waiters to post-mortem GPU hang state. v18: Return early for a completed wait after acquiring the spinlock. Avoids adding ourselves to the tree if the is already complete, and skips the awkward question of why we don't do completion wakeups for waits earlier than or equal to ourselves. v19: Prepare for init_breadcrumbs to fail. Later patches may want to allocate during init, so be prepared to propagate back the error code. Testcase: igt/gem_concurrent_blit Testcase: igt/benchmarks/gem_latency Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: "Rogozhkin, Dmitry V" <dmitry.v.rogozhkin@intel.com> Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Dave Gordon <david.s.gordon@intel.com> Cc: "Goel, Akash" <akash.goel@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> #v18 Link: http://patchwork.freedesktop.org/patch/msgid/1467390209-3576-6-git-send-email-chris@chris-wilson.co.uk
2016-07-01 23:23:15 +07:00
seq_printf(m, "\twaiters? %d\n",
intel_engine_has_waiter(engine));
seq_printf(m, "\tuser interrupts = %lx [current %lx]\n",
engine->hangcheck.user_interrupts,
READ_ONCE(engine->breadcrumbs.irq_wakeups));
seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
(long long)engine->hangcheck.acthd,
(long long)acthd[id]);
seq_printf(m, "\tscore = %d\n", engine->hangcheck.score);
seq_printf(m, "\taction = %d\n", engine->hangcheck.action);
if (engine->id == RCS) {
seq_puts(m, "\tinstdone read =");
for (j = 0; j < I915_NUM_INSTDONE_REG; j++)
seq_printf(m, " 0x%08x", instdone[j]);
seq_puts(m, "\n\tinstdone accu =");
for (j = 0; j < I915_NUM_INSTDONE_REG; j++)
seq_printf(m, " 0x%08x",
engine->hangcheck.instdone[j]);
seq_puts(m, "\n");
}
}
return 0;
}
static int ironlake_drpc_info(struct seq_file *m)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
u32 rgvmodectl, rstdbyctl;
u16 crstandvid;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
rgvmodectl = I915_READ(MEMMODECTL);
rstdbyctl = I915_READ(RSTDBYCTL);
crstandvid = I915_READ16(CRSTANDVID);
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN));
seq_printf(m, "Boost freq: %d\n",
(rgvmodectl & MEMMODE_BOOST_FREQ_MASK) >>
MEMMODE_BOOST_FREQ_SHIFT);
seq_printf(m, "HW control enabled: %s\n",
yesno(rgvmodectl & MEMMODE_HWIDLE_EN));
seq_printf(m, "SW control enabled: %s\n",
yesno(rgvmodectl & MEMMODE_SWMODE_EN));
seq_printf(m, "Gated voltage change: %s\n",
yesno(rgvmodectl & MEMMODE_RCLK_GATE));
seq_printf(m, "Starting frequency: P%d\n",
(rgvmodectl & MEMMODE_FSTART_MASK) >> MEMMODE_FSTART_SHIFT);
seq_printf(m, "Max P-state: P%d\n",
(rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT);
seq_printf(m, "Min P-state: P%d\n", (rgvmodectl & MEMMODE_FMIN_MASK));
seq_printf(m, "RS1 VID: %d\n", (crstandvid & 0x3f));
seq_printf(m, "RS2 VID: %d\n", ((crstandvid >> 8) & 0x3f));
seq_printf(m, "Render standby enabled: %s\n",
yesno(!(rstdbyctl & RCX_SW_EXIT)));
seq_puts(m, "Current RS state: ");
switch (rstdbyctl & RSX_STATUS_MASK) {
case RSX_STATUS_ON:
seq_puts(m, "on\n");
break;
case RSX_STATUS_RC1:
seq_puts(m, "RC1\n");
break;
case RSX_STATUS_RC1E:
seq_puts(m, "RC1E\n");
break;
case RSX_STATUS_RS1:
seq_puts(m, "RS1\n");
break;
case RSX_STATUS_RS2:
seq_puts(m, "RS2 (RC6)\n");
break;
case RSX_STATUS_RS3:
seq_puts(m, "RC3 (RC6+)\n");
break;
default:
seq_puts(m, "unknown\n");
break;
}
return 0;
}
static int i915_forcewake_domains(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_uncore_forcewake_domain *fw_domain;
spin_lock_irq(&dev_priv->uncore.lock);
for_each_fw_domain(fw_domain, dev_priv) {
seq_printf(m, "%s.wake_count = %u\n",
intel_uncore_forcewake_domain_to_str(fw_domain->id),
fw_domain->wake_count);
}
spin_unlock_irq(&dev_priv->uncore.lock);
return 0;
}
static int vlv_drpc_info(struct seq_file *m)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
u32 rpmodectl1, rcctl1, pw_status;
intel_runtime_pm_get(dev_priv);
pw_status = I915_READ(VLV_GTLC_PW_STATUS);
rpmodectl1 = I915_READ(GEN6_RP_CONTROL);
rcctl1 = I915_READ(GEN6_RC_CONTROL);
intel_runtime_pm_put(dev_priv);
seq_printf(m, "Video Turbo Mode: %s\n",
yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO));
seq_printf(m, "Turbo enabled: %s\n",
yesno(rpmodectl1 & GEN6_RP_ENABLE));
seq_printf(m, "HW control enabled: %s\n",
yesno(rpmodectl1 & GEN6_RP_ENABLE));
seq_printf(m, "SW control enabled: %s\n",
yesno((rpmodectl1 & GEN6_RP_MEDIA_MODE_MASK) ==
GEN6_RP_MEDIA_SW_MODE));
seq_printf(m, "RC6 Enabled: %s\n",
yesno(rcctl1 & (GEN7_RC_CTL_TO_MODE |
GEN6_RC_CTL_EI_MODE(1))));
seq_printf(m, "Render Power Well: %s\n",
(pw_status & VLV_GTLC_PW_RENDER_STATUS_MASK) ? "Up" : "Down");
seq_printf(m, "Media Power Well: %s\n",
(pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down");
seq_printf(m, "Render RC6 residency since boot: %u\n",
I915_READ(VLV_GT_RENDER_RC6));
seq_printf(m, "Media RC6 residency since boot: %u\n",
I915_READ(VLV_GT_MEDIA_RC6));
return i915_forcewake_domains(m, NULL);
}
static int gen6_drpc_info(struct seq_file *m)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0;
u32 gen9_powergate_enable = 0, gen9_powergate_status = 0;
unsigned forcewake_count;
int count = 0, ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
spin_lock_irq(&dev_priv->uncore.lock);
forcewake_count = dev_priv->uncore.fw_domain[FW_DOMAIN_ID_RENDER].wake_count;
spin_unlock_irq(&dev_priv->uncore.lock);
if (forcewake_count) {
seq_puts(m, "RC information inaccurate because somebody "
"holds a forcewake reference \n");
} else {
/* NB: we cannot use forcewake, else we read the wrong values */
while (count++ < 50 && (I915_READ_NOTRACE(FORCEWAKE_ACK) & 1))
udelay(10);
seq_printf(m, "RC information accurate: %s\n", yesno(count < 51));
}
gt_core_status = I915_READ_FW(GEN6_GT_CORE_STATUS);
trace_i915_reg_rw(false, GEN6_GT_CORE_STATUS, gt_core_status, 4, true);
rpmodectl1 = I915_READ(GEN6_RP_CONTROL);
rcctl1 = I915_READ(GEN6_RC_CONTROL);
if (INTEL_INFO(dev)->gen >= 9) {
gen9_powergate_enable = I915_READ(GEN9_PG_ENABLE);
gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS);
}
mutex_unlock(&dev->struct_mutex);
mutex_lock(&dev_priv->rps.hw_lock);
sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
mutex_unlock(&dev_priv->rps.hw_lock);
intel_runtime_pm_put(dev_priv);
seq_printf(m, "Video Turbo Mode: %s\n",
yesno(rpmodectl1 & GEN6_RP_MEDIA_TURBO));
seq_printf(m, "HW control enabled: %s\n",
yesno(rpmodectl1 & GEN6_RP_ENABLE));
seq_printf(m, "SW control enabled: %s\n",
yesno((rpmodectl1 & GEN6_RP_MEDIA_MODE_MASK) ==
GEN6_RP_MEDIA_SW_MODE));
seq_printf(m, "RC1e Enabled: %s\n",
yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
seq_printf(m, "RC6 Enabled: %s\n",
yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE));
if (INTEL_INFO(dev)->gen >= 9) {
seq_printf(m, "Render Well Gating Enabled: %s\n",
yesno(gen9_powergate_enable & GEN9_RENDER_PG_ENABLE));
seq_printf(m, "Media Well Gating Enabled: %s\n",
yesno(gen9_powergate_enable & GEN9_MEDIA_PG_ENABLE));
}
seq_printf(m, "Deep RC6 Enabled: %s\n",
yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE));
seq_printf(m, "Deepest RC6 Enabled: %s\n",
yesno(rcctl1 & GEN6_RC_CTL_RC6pp_ENABLE));
seq_puts(m, "Current RC state: ");
switch (gt_core_status & GEN6_RCn_MASK) {
case GEN6_RC0:
if (gt_core_status & GEN6_CORE_CPD_STATE_MASK)
seq_puts(m, "Core Power Down\n");
else
seq_puts(m, "on\n");
break;
case GEN6_RC3:
seq_puts(m, "RC3\n");
break;
case GEN6_RC6:
seq_puts(m, "RC6\n");
break;
case GEN6_RC7:
seq_puts(m, "RC7\n");
break;
default:
seq_puts(m, "Unknown\n");
break;
}
seq_printf(m, "Core Power Down: %s\n",
yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK));
if (INTEL_INFO(dev)->gen >= 9) {
seq_printf(m, "Render Power Well: %s\n",
(gen9_powergate_status &
GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down");
seq_printf(m, "Media Power Well: %s\n",
(gen9_powergate_status &
GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down");
}
/* Not exactly sure what this is */
seq_printf(m, "RC6 \"Locked to RPn\" residency since boot: %u\n",
I915_READ(GEN6_GT_GFX_RC6_LOCKED));
seq_printf(m, "RC6 residency since boot: %u\n",
I915_READ(GEN6_GT_GFX_RC6));
seq_printf(m, "RC6+ residency since boot: %u\n",
I915_READ(GEN6_GT_GFX_RC6p));
seq_printf(m, "RC6++ residency since boot: %u\n",
I915_READ(GEN6_GT_GFX_RC6pp));
seq_printf(m, "RC6 voltage: %dmV\n",
GEN6_DECODE_RC6_VID(((rc6vids >> 0) & 0xff)));
seq_printf(m, "RC6+ voltage: %dmV\n",
GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff)));
seq_printf(m, "RC6++ voltage: %dmV\n",
GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff)));
return i915_forcewake_domains(m, NULL);
}
static int i915_drpc_info(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev))
return vlv_drpc_info(m);
else if (INTEL_INFO(dev)->gen >= 6)
return gen6_drpc_info(m);
else
return ironlake_drpc_info(m);
}
static int i915_frontbuffer_tracking(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
seq_printf(m, "FB tracking busy bits: 0x%08x\n",
dev_priv->fb_tracking.busy_bits);
seq_printf(m, "FB tracking flip bits: 0x%08x\n",
dev_priv->fb_tracking.flip_bits);
return 0;
}
static int i915_fbc_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
if (!HAS_FBC(dev)) {
seq_puts(m, "FBC unsupported on this chipset\n");
return 0;
}
intel_runtime_pm_get(dev_priv);
mutex_lock(&dev_priv->fbc.lock);
if (intel_fbc_is_active(dev_priv))
seq_puts(m, "FBC enabled\n");
else
seq_printf(m, "FBC disabled: %s\n",
dev_priv->fbc.no_fbc_reason);
if (INTEL_INFO(dev_priv)->gen >= 7)
seq_printf(m, "Compressing: %s\n",
yesno(I915_READ(FBC_STATUS2) &
FBC_COMPRESSION_MASK));
mutex_unlock(&dev_priv->fbc.lock);
intel_runtime_pm_put(dev_priv);
return 0;
}
static int i915_fbc_fc_get(void *data, u64 *val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
if (INTEL_INFO(dev)->gen < 7 || !HAS_FBC(dev))
return -ENODEV;
*val = dev_priv->fbc.false_color;
return 0;
}
static int i915_fbc_fc_set(void *data, u64 val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
u32 reg;
if (INTEL_INFO(dev)->gen < 7 || !HAS_FBC(dev))
return -ENODEV;
mutex_lock(&dev_priv->fbc.lock);
reg = I915_READ(ILK_DPFC_CONTROL);
dev_priv->fbc.false_color = val;
I915_WRITE(ILK_DPFC_CONTROL, val ?
(reg | FBC_CTL_FALSE_COLOR) :
(reg & ~FBC_CTL_FALSE_COLOR));
mutex_unlock(&dev_priv->fbc.lock);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_fbc_fc_fops,
i915_fbc_fc_get, i915_fbc_fc_set,
"%llu\n");
static int i915_ips_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
if (!HAS_IPS(dev)) {
seq_puts(m, "not supported\n");
return 0;
}
intel_runtime_pm_get(dev_priv);
seq_printf(m, "Enabled by kernel parameter: %s\n",
yesno(i915.enable_ips));
if (INTEL_INFO(dev)->gen >= 8) {
seq_puts(m, "Currently: unknown\n");
} else {
if (I915_READ(IPS_CTL) & IPS_ENABLE)
seq_puts(m, "Currently: enabled\n");
else
seq_puts(m, "Currently: disabled\n");
}
intel_runtime_pm_put(dev_priv);
return 0;
}
static int i915_sr_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
bool sr_enabled = false;
intel_runtime_pm_get(dev_priv);
if (HAS_PCH_SPLIT(dev))
sr_enabled = I915_READ(WM1_LP_ILK) & WM1_LP_SR_EN;
else if (IS_CRESTLINE(dev) || IS_G4X(dev) ||
IS_I945G(dev) || IS_I945GM(dev))
sr_enabled = I915_READ(FW_BLC_SELF) & FW_BLC_SELF_EN;
else if (IS_I915GM(dev))
sr_enabled = I915_READ(INSTPM) & INSTPM_SELF_EN;
else if (IS_PINEVIEW(dev))
sr_enabled = I915_READ(DSPFW3) & PINEVIEW_SELF_REFRESH_EN;
else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev))
sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN;
intel_runtime_pm_put(dev_priv);
seq_printf(m, "self-refresh: %s\n",
sr_enabled ? "enabled" : "disabled");
return 0;
}
static int i915_emon_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
unsigned long temp, chipset, gfx;
int ret;
if (!IS_GEN5(dev))
return -ENODEV;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
temp = i915_mch_val(dev_priv);
chipset = i915_chipset_val(dev_priv);
gfx = i915_gfx_val(dev_priv);
mutex_unlock(&dev->struct_mutex);
seq_printf(m, "GMCH temp: %ld\n", temp);
seq_printf(m, "Chipset power: %ld\n", chipset);
seq_printf(m, "GFX power: %ld\n", gfx);
seq_printf(m, "Total power: %ld\n", chipset + gfx);
return 0;
}
static int i915_ring_freq_table(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
int ret = 0;
int gpu_freq, ia_freq;
unsigned int max_gpu_freq, min_gpu_freq;
if (!HAS_CORE_RING_FREQ(dev)) {
seq_puts(m, "unsupported on this chipset\n");
return 0;
}
intel_runtime_pm_get(dev_priv);
ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
if (ret)
goto out;
if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
/* Convert GT frequency to 50 HZ units */
min_gpu_freq =
dev_priv->rps.min_freq_softlimit / GEN9_FREQ_SCALER;
max_gpu_freq =
dev_priv->rps.max_freq_softlimit / GEN9_FREQ_SCALER;
} else {
min_gpu_freq = dev_priv->rps.min_freq_softlimit;
max_gpu_freq = dev_priv->rps.max_freq_softlimit;
}
seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
ia_freq = gpu_freq;
sandybridge_pcode_read(dev_priv,
GEN6_PCODE_READ_MIN_FREQ_TABLE,
&ia_freq);
seq_printf(m, "%d\t\t%d\t\t\t\t%d\n",
intel_gpu_freq(dev_priv, (gpu_freq *
(IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ?
GEN9_FREQ_SCALER : 1))),
((ia_freq >> 0) & 0xff) * 100,
((ia_freq >> 8) & 0xff) * 100);
}
mutex_unlock(&dev_priv->rps.hw_lock);
out:
intel_runtime_pm_put(dev_priv);
return ret;
}
static int i915_opregion(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_opregion *opregion = &dev_priv->opregion;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
goto out;
if (opregion->header)
seq_write(m, opregion->header, OPREGION_SIZE);
mutex_unlock(&dev->struct_mutex);
out:
return 0;
}
static int i915_vbt(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_opregion *opregion = &dev_priv->opregion;
if (opregion->vbt)
seq_write(m, opregion->vbt, opregion->vbt_size);
return 0;
}
static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct intel_framebuffer *fbdev_fb = NULL;
struct drm_framebuffer *drm_fb;
drm/i915: Add struct_mutex locking for debugs/i915_gem_framebuffer Since describe_obj() looks at state guarded by the struct_mutex, we need to be holding it. [ 580.201054] drv_suspend: starting subtest debugfs-reader [ 580.239652] ------------[ cut here ]------------ [ 580.239696] WARNING: CPU: 0 PID: 920 at include/linux/list_check.h:25 describe_obj+0x419/0x440() [ 580.239725] CPU: 0 PID: 920 Comm: cat Not tainted 4.5.0-rc6+ #835 [ 580.239745] Hardware name: /NUC5CPYB, BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 580.239767] 0000000000000000 ffff88027554fcf8 ffffffff812c1135 0000000000000000 [ 580.239815] ffffffff8193dc42 ffff88027554fd30 ffffffff8107419d ffff880071727c00 [ 580.239858] ffff8802757d8000 ffffffff818f693c ffffffff818f693c ffff8802757b9048 [ 580.239896] Call Trace: [ 580.239917] [<ffffffff812c1135>] dump_stack+0x67/0x92 [ 580.239939] [<ffffffff8107419d>] warn_slowpath_common+0x7d/0xb0 [ 580.239959] [<ffffffff810742ba>] warn_slowpath_null+0x1a/0x20 [ 580.239981] [<ffffffff813ce579>] describe_obj+0x419/0x440 [ 580.240006] [<ffffffff813ced22>] i915_gem_framebuffer_info+0xa2/0x100 [ 580.240033] [<ffffffff811a9286>] seq_read+0xe6/0x3b0 [ 580.240059] [<ffffffff81182288>] __vfs_read+0x28/0xd0 [ 580.240085] [<ffffffff81173378>] ? SyS_fadvise64+0x228/0x2c0 [ 580.240112] [<ffffffff811823b2>] vfs_read+0x82/0x110 [ 580.240137] [<ffffffff811827d9>] SyS_read+0x49/0xa0 [ 580.240162] [<ffffffff815bac57>] entry_SYSCALL_64_fastpath+0x12/0x6b [ 580.240187] ---[ end trace 3e2cbf34576c9878 ]--- [ 580.281900] ------------[ cut here ]------------ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1459689261-7920-1-git-send-email-chris@chris-wilson.co.uk
2016-04-03 20:14:20 +07:00
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
#ifdef CONFIG_DRM_FBDEV_EMULATION
if (to_i915(dev)->fbdev) {
fbdev_fb = to_intel_framebuffer(to_i915(dev)->fbdev->helper.fb);
seq_printf(m, "fbcon size: %d x %d, depth %d, %d bpp, modifier 0x%llx, refcount %d, obj ",
fbdev_fb->base.width,
fbdev_fb->base.height,
fbdev_fb->base.depth,
fbdev_fb->base.bits_per_pixel,
fbdev_fb->base.modifier[0],
drm_framebuffer_read_refcount(&fbdev_fb->base));
describe_obj(m, fbdev_fb->obj);
seq_putc(m, '\n');
}
#endif
drm: revamp locking around fb creation/destruction Well, at least step 1. The goal here is that framebuffer objects can survive outside of the mode_config lock, with just a reference held as protection. The first step to get there is to introduce a special fb_lock which protects fb lookup, creation and destruction, to make them appear atomic. This new fb_lock can nest within the mode_config lock. But the idea is (once the reference counting part is completed) that we only quickly take that fb_lock to lookup a framebuffer and grab a reference, without any other locks involved. vmwgfx is the only driver which does framebuffer lookups itself, also wrap those calls to drm_mode_object_find with the new lock. Also protect the fb_list walking in i915 and omapdrm with the new lock. As a slight complication there's also the list of user-created fbs attached to the file private. The problem now is that at fclose() time we need to walk that list, eventually do a modeset call to remove the fb from active usage (and are required to be able to take the mode_config lock), but in the end we need to grab the new fb_lock to remove the fb from the list. The easiest solution is to add another mutex to protect this per-file list. Currently that new fbs_lock nests within the modeset locks and so appears redudant. But later patches will switch around this sequence so that taking the modeset locks in the fb destruction path is optional in the fastpath. Ultimately the goal is that addfb and rmfb do not require the mode_config lock, since otherwise they have the potential to introduce stalls in the pageflip sequence of a compositor (if the compositor e.g. switches to a fullscreen client or if it enables a plane). But that requires a few more steps and hoops to jump through. Note that framebuffer creation/destruction is now double-protected - once by the fb_lock and in parts by the idr_lock. The later would be unnecessariy if framebuffers would have their own idr allocator. But that's material for another patch (series). v2: Properly initialize the fb->filp_head list in _init, otherwise the newly added WARN to check whether the fb isn't on a fpriv list any more will fail for driver-private objects. v3: Fixup two error-case unlock bugs spotted by Richard Wilbur. Reviewed-by: Rob Clark <rob@ti.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-12-11 03:19:18 +07:00
mutex_lock(&dev->mode_config.fb_lock);
drm_for_each_fb(drm_fb, dev) {
struct intel_framebuffer *fb = to_intel_framebuffer(drm_fb);
if (fb == fbdev_fb)
continue;
seq_printf(m, "user size: %d x %d, depth %d, %d bpp, modifier 0x%llx, refcount %d, obj ",
fb->base.width,
fb->base.height,
fb->base.depth,
fb->base.bits_per_pixel,
fb->base.modifier[0],
drm_framebuffer_read_refcount(&fb->base));
describe_obj(m, fb->obj);
seq_putc(m, '\n');
}
drm: revamp locking around fb creation/destruction Well, at least step 1. The goal here is that framebuffer objects can survive outside of the mode_config lock, with just a reference held as protection. The first step to get there is to introduce a special fb_lock which protects fb lookup, creation and destruction, to make them appear atomic. This new fb_lock can nest within the mode_config lock. But the idea is (once the reference counting part is completed) that we only quickly take that fb_lock to lookup a framebuffer and grab a reference, without any other locks involved. vmwgfx is the only driver which does framebuffer lookups itself, also wrap those calls to drm_mode_object_find with the new lock. Also protect the fb_list walking in i915 and omapdrm with the new lock. As a slight complication there's also the list of user-created fbs attached to the file private. The problem now is that at fclose() time we need to walk that list, eventually do a modeset call to remove the fb from active usage (and are required to be able to take the mode_config lock), but in the end we need to grab the new fb_lock to remove the fb from the list. The easiest solution is to add another mutex to protect this per-file list. Currently that new fbs_lock nests within the modeset locks and so appears redudant. But later patches will switch around this sequence so that taking the modeset locks in the fb destruction path is optional in the fastpath. Ultimately the goal is that addfb and rmfb do not require the mode_config lock, since otherwise they have the potential to introduce stalls in the pageflip sequence of a compositor (if the compositor e.g. switches to a fullscreen client or if it enables a plane). But that requires a few more steps and hoops to jump through. Note that framebuffer creation/destruction is now double-protected - once by the fb_lock and in parts by the idr_lock. The later would be unnecessariy if framebuffers would have their own idr allocator. But that's material for another patch (series). v2: Properly initialize the fb->filp_head list in _init, otherwise the newly added WARN to check whether the fb isn't on a fpriv list any more will fail for driver-private objects. v3: Fixup two error-case unlock bugs spotted by Richard Wilbur. Reviewed-by: Rob Clark <rob@ti.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-12-11 03:19:18 +07:00
mutex_unlock(&dev->mode_config.fb_lock);
drm/i915: Add struct_mutex locking for debugs/i915_gem_framebuffer Since describe_obj() looks at state guarded by the struct_mutex, we need to be holding it. [ 580.201054] drv_suspend: starting subtest debugfs-reader [ 580.239652] ------------[ cut here ]------------ [ 580.239696] WARNING: CPU: 0 PID: 920 at include/linux/list_check.h:25 describe_obj+0x419/0x440() [ 580.239725] CPU: 0 PID: 920 Comm: cat Not tainted 4.5.0-rc6+ #835 [ 580.239745] Hardware name: /NUC5CPYB, BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 580.239767] 0000000000000000 ffff88027554fcf8 ffffffff812c1135 0000000000000000 [ 580.239815] ffffffff8193dc42 ffff88027554fd30 ffffffff8107419d ffff880071727c00 [ 580.239858] ffff8802757d8000 ffffffff818f693c ffffffff818f693c ffff8802757b9048 [ 580.239896] Call Trace: [ 580.239917] [<ffffffff812c1135>] dump_stack+0x67/0x92 [ 580.239939] [<ffffffff8107419d>] warn_slowpath_common+0x7d/0xb0 [ 580.239959] [<ffffffff810742ba>] warn_slowpath_null+0x1a/0x20 [ 580.239981] [<ffffffff813ce579>] describe_obj+0x419/0x440 [ 580.240006] [<ffffffff813ced22>] i915_gem_framebuffer_info+0xa2/0x100 [ 580.240033] [<ffffffff811a9286>] seq_read+0xe6/0x3b0 [ 580.240059] [<ffffffff81182288>] __vfs_read+0x28/0xd0 [ 580.240085] [<ffffffff81173378>] ? SyS_fadvise64+0x228/0x2c0 [ 580.240112] [<ffffffff811823b2>] vfs_read+0x82/0x110 [ 580.240137] [<ffffffff811827d9>] SyS_read+0x49/0xa0 [ 580.240162] [<ffffffff815bac57>] entry_SYSCALL_64_fastpath+0x12/0x6b [ 580.240187] ---[ end trace 3e2cbf34576c9878 ]--- [ 580.281900] ------------[ cut here ]------------ Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1459689261-7920-1-git-send-email-chris@chris-wilson.co.uk
2016-04-03 20:14:20 +07:00
mutex_unlock(&dev->struct_mutex);
return 0;
}
static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring)
{
seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)",
ring->space, ring->head, ring->tail,
ring->last_retired_head);
}
static int i915_context_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_engine_cs *engine;
struct i915_gem_context *ctx;
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
list_for_each_entry(ctx, &dev_priv->context_list, link) {
seq_printf(m, "HW context %u ", ctx->hw_id);
if (IS_ERR(ctx->file_priv)) {
seq_puts(m, "(deleted) ");
} else if (ctx->file_priv) {
struct pid *pid = ctx->file_priv->file->pid;
struct task_struct *task;
task = get_pid_task(pid, PIDTYPE_PID);
if (task) {
seq_printf(m, "(%s [%d]) ",
task->comm, task->pid);
put_task_struct(task);
}
} else {
seq_puts(m, "(kernel) ");
}
seq_putc(m, ctx->remap_slice ? 'R' : 'r');
seq_putc(m, '\n');
for_each_engine(engine, dev_priv) {
struct intel_context *ce = &ctx->engine[engine->id];
seq_printf(m, "%s: ", engine->name);
seq_putc(m, ce->initialised ? 'I' : 'i');
if (ce->state)
describe_obj(m, ce->state);
if (ce->ring)
describe_ctx_ring(m, ce->ring);
seq_putc(m, '\n');
}
seq_putc(m, '\n');
}
mutex_unlock(&dev->struct_mutex);
return 0;
}
static void i915_dump_lrc_obj(struct seq_file *m,
struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
struct page *page;
uint32_t *reg_state;
int j;
unsigned long ggtt_offset = 0;
seq_printf(m, "CONTEXT: %s %u\n", engine->name, ctx->hw_id);
if (ctx_obj == NULL) {
seq_puts(m, "\tNot allocated\n");
return;
}
if (!i915_gem_obj_ggtt_bound(ctx_obj))
seq_puts(m, "\tNot bound in GGTT\n");
else
ggtt_offset = i915_gem_obj_ggtt_offset(ctx_obj);
if (i915_gem_object_get_pages(ctx_obj)) {
seq_puts(m, "\tFailed to get pages for context object\n");
return;
}
drm/i915: Integrate GuC-based command submission GuC-based submission is mostly the same as execlist mode, up to intel_logical_ring_advance_and_submit(), where the context being dispatched would be added to the execlist queue; at this point we submit the context to the GuC backend instead. There are, however, a few other changes also required, notably: 1. Contexts must be pinned at GGTT addresses accessible by the GuC i.e. NOT in the range [0..WOPCM_SIZE), so we have to add the PIN_OFFSET_BIAS flag to the relevant GGTT-pinning calls. 2. The GuC's TLB must be invalidated after a context is pinned at a new GGTT address. 3. GuC firmware uses the one page before Ring Context as shared data. Therefore, whenever driver wants to get base address of LRC, we will offset one page for it. LRC_PPHWSP_PN is defined as the page number of LRCA. 4. In the work queue used to pass requests to the GuC, the GuC firmware requires the ring-tail-offset to be represented as an 11-bit value, expressed in QWords. Therefore, the ringbuffer size must be reduced to the representable range (4 pages). v2: Defer adding #defines until needed [Chris Wilson] Rationalise type declarations [Chris Wilson] v4: Squashed kerneldoc patch into here [Daniel Vetter] v5: Update request->tail in code common to both GuC and execlist modes. Add a private version of lr_context_update(), as sharing the execlist version leads to race conditions when the CPU and the GuC both update TAIL in the context image. Conversion of error-captured HWS page to string must account for offset from start of object to actual HWS (LRC_PPHWSP_PN). Issue: VIZ-4884 Signed-off-by: Alex Dai <yu.dai@intel.com> Signed-off-by: Dave Gordon <david.s.gordon@intel.com> Reviewed-by: Tom O'Rourke <Tom.O'Rourke@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-08-12 21:43:43 +07:00
page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
if (!WARN_ON(page == NULL)) {
reg_state = kmap_atomic(page);
for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) {
seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 0x%08x\n",
ggtt_offset + 4096 + (j * 4),
reg_state[j], reg_state[j + 1],
reg_state[j + 2], reg_state[j + 3]);
}
kunmap_atomic(reg_state);
}
seq_putc(m, '\n');
}
static int i915_dump_lrc(struct seq_file *m, void *unused)
{
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_engine_cs *engine;
struct i915_gem_context *ctx;
int ret;
if (!i915.enable_execlists) {
seq_printf(m, "Logical Ring Contexts are disabled\n");
return 0;
}
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
list_for_each_entry(ctx, &dev_priv->context_list, link)
for_each_engine(engine, dev_priv)
i915_dump_lrc_obj(m, ctx, engine);
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int i915_execlists(struct seq_file *m, void *data)
{
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_engine_cs *engine;
u32 status_pointer;
u8 read_pointer;
u8 write_pointer;
u32 status;
u32 ctx_id;
struct list_head *cursor;
int i, ret;
if (!i915.enable_execlists) {
seq_puts(m, "Logical Ring Contexts are disabled\n");
return 0;
}
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
for_each_engine(engine, dev_priv) {
struct drm_i915_gem_request *head_req = NULL;
int count = 0;
seq_printf(m, "%s\n", engine->name);
status = I915_READ(RING_EXECLIST_STATUS_LO(engine));
ctx_id = I915_READ(RING_EXECLIST_STATUS_HI(engine));
seq_printf(m, "\tExeclist status: 0x%08X, context: %u\n",
status, ctx_id);
status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
seq_printf(m, "\tStatus pointer: 0x%08X\n", status_pointer);
read_pointer = engine->next_context_status_buffer;
write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
if (read_pointer > write_pointer)
write_pointer += GEN8_CSB_ENTRIES;
seq_printf(m, "\tRead pointer: 0x%08X, write pointer 0x%08X\n",
read_pointer, write_pointer);
for (i = 0; i < GEN8_CSB_ENTRIES; i++) {
status = I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, i));
ctx_id = I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, i));
seq_printf(m, "\tStatus buffer %d: 0x%08X, context: %u\n",
i, status, ctx_id);
}
drm/i915: Move execlists irq handler to a bottom half Doing a lot of work in the interrupt handler introduces huge latencies to the system as a whole. Most dramatic effect can be seen by running an all engine stress test like igt/gem_exec_nop/all where, when the kernel config is lean enough, the whole system can be brought into multi-second periods of complete non-interactivty. That can look for example like this: NMI watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/u8:3:143] Modules linked in: [redacted for brevity] CPU: 0 PID: 143 Comm: kworker/u8:3 Tainted: G U L 4.5.0-160321+ #183 Hardware name: Intel Corporation Broadwell Client platform/WhiteTip Mountain 1 Workqueue: i915 gen6_pm_rps_work [i915] task: ffff8800aae88000 ti: ffff8800aae90000 task.ti: ffff8800aae90000 RIP: 0010:[<ffffffff8104a3c2>] [<ffffffff8104a3c2>] __do_softirq+0x72/0x1d0 RSP: 0000:ffff88014f403f38 EFLAGS: 00000206 RAX: ffff8800aae94000 RBX: 0000000000000000 RCX: 00000000000006e0 RDX: 0000000000000020 RSI: 0000000004208060 RDI: 0000000000215d80 RBP: ffff88014f403f80 R08: 0000000b1b42c180 R09: 0000000000000022 R10: 0000000000000004 R11: 00000000ffffffff R12: 000000000000a030 R13: 0000000000000082 R14: ffff8800aa4d0080 R15: 0000000000000082 FS: 0000000000000000(0000) GS:ffff88014f400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa53b90c000 CR3: 0000000001a0a000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Stack: 042080601b33869f ffff8800aae94000 00000000fffc2678 ffff88010000000a 0000000000000000 000000000000a030 0000000000005302 ffff8800aa4d0080 0000000000000206 ffff88014f403f90 ffffffff8104a716 ffff88014f403fa8 Call Trace: <IRQ> [<ffffffff8104a716>] irq_exit+0x86/0x90 [<ffffffff81031e7d>] smp_apic_timer_interrupt+0x3d/0x50 [<ffffffff814f3eac>] apic_timer_interrupt+0x7c/0x90 <EOI> [<ffffffffa01c5b40>] ? gen8_write64+0x1a0/0x1a0 [i915] [<ffffffff814f2b39>] ? _raw_spin_unlock_irqrestore+0x9/0x20 [<ffffffffa01c5c44>] gen8_write32+0x104/0x1a0 [i915] [<ffffffff8132c6a2>] ? n_tty_receive_buf_common+0x372/0xae0 [<ffffffffa017cc9e>] gen6_set_rps_thresholds+0x1be/0x330 [i915] [<ffffffffa017eaf0>] gen6_set_rps+0x70/0x200 [i915] [<ffffffffa0185375>] intel_set_rps+0x25/0x30 [i915] [<ffffffffa01768fd>] gen6_pm_rps_work+0x10d/0x2e0 [i915] [<ffffffff81063852>] ? finish_task_switch+0x72/0x1c0 [<ffffffff8105ab29>] process_one_work+0x139/0x350 [<ffffffff8105b186>] worker_thread+0x126/0x490 [<ffffffff8105b060>] ? rescuer_thread+0x320/0x320 [<ffffffff8105fa64>] kthread+0xc4/0xe0 [<ffffffff8105f9a0>] ? kthread_create_on_node+0x170/0x170 [<ffffffff814f351f>] ret_from_fork+0x3f/0x70 [<ffffffff8105f9a0>] ? kthread_create_on_node+0x170/0x170 I could not explain, or find a code path, which would explain a +20 second lockup, but from some instrumentation it was apparent the interrupts off proportion of time was between 10-25% under heavy load which is quite bad. When a interrupt "cliff" is reached, which was >~320k irq/s on my machine, the whole system goes into a terrible state of the above described multi-second lockups. By moving the GT interrupt handling to a tasklet in a most simple way, the problem above disappears completely. Testing the effect on sytem-wide latencies using igt/gem_syslatency shows the following before this patch: gem_syslatency: cycles=1532739, latency mean=416531.829us max=2499237us gem_syslatency: cycles=1839434, latency mean=1458099.157us max=4998944us gem_syslatency: cycles=1432570, latency mean=2688.451us max=1201185us gem_syslatency: cycles=1533543, latency mean=416520.499us max=2498886us This shows that the unrelated process is experiencing huge delays in its wake-up latency. After the patch the results look like this: gem_syslatency: cycles=808907, latency mean=53.133us max=1640us gem_syslatency: cycles=862154, latency mean=62.778us max=2117us gem_syslatency: cycles=856039, latency mean=58.079us max=2123us gem_syslatency: cycles=841683, latency mean=56.914us max=1667us Showing a huge improvement in the unrelated process wake-up latency. It also shows an approximate halving in the number of total empty batches submitted during the test. This may not be worrying since the test puts the driver under a very unrealistic load with ncpu threads doing empty batch submission to all GPU engines each. Another benefit compared to the hard-irq handling is that now work on all engines can be dispatched in parallel since we can have up to number of CPUs active tasklets. (While previously a single hard-irq would serially dispatch on one engine after another.) More interesting scenario with regards to throughput is "gem_latency -n 100" which shows 25% better throughput and CPU usage, and 14% better dispatch latencies. I did not find any gains or regressions with Synmark2 or GLbench under light testing. More benchmarking is certainly required. v2: * execlists_lock should be taken as spin_lock_bh when queuing work from userspace now. (Chris Wilson) * uncore.lock must be taken with spin_lock_irq when submitting requests since that now runs from either softirq or process context. v3: * Expanded commit message with more testing data; * converted missed locking sites to _bh; * added execlist_lock comment. (Chris Wilson) v4: * Mention dispatch parallelism in commit. (Chris Wilson) * Do not hold uncore.lock over MMIO reads since the block is already serialised per-engine via the tasklet itself. (Chris Wilson) * intel_lrc_irq_handler should be static. (Chris Wilson) * Cancel/sync the tasklet on GPU reset. (Chris Wilson) * Document and WARN that tasklet cannot be active/pending on engine cleanup. (Chris Wilson/Imre Deak) Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Imre Deak <imre.deak@intel.com> Testcase: igt/gem_exec_nop/all Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94350 Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/1459768316-6670-1-git-send-email-tvrtko.ursulin@linux.intel.com
2016-04-04 18:11:56 +07:00
spin_lock_bh(&engine->execlist_lock);
list_for_each(cursor, &engine->execlist_queue)
count++;
head_req = list_first_entry_or_null(&engine->execlist_queue,
struct drm_i915_gem_request,
execlist_link);
drm/i915: Move execlists irq handler to a bottom half Doing a lot of work in the interrupt handler introduces huge latencies to the system as a whole. Most dramatic effect can be seen by running an all engine stress test like igt/gem_exec_nop/all where, when the kernel config is lean enough, the whole system can be brought into multi-second periods of complete non-interactivty. That can look for example like this: NMI watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [kworker/u8:3:143] Modules linked in: [redacted for brevity] CPU: 0 PID: 143 Comm: kworker/u8:3 Tainted: G U L 4.5.0-160321+ #183 Hardware name: Intel Corporation Broadwell Client platform/WhiteTip Mountain 1 Workqueue: i915 gen6_pm_rps_work [i915] task: ffff8800aae88000 ti: ffff8800aae90000 task.ti: ffff8800aae90000 RIP: 0010:[<ffffffff8104a3c2>] [<ffffffff8104a3c2>] __do_softirq+0x72/0x1d0 RSP: 0000:ffff88014f403f38 EFLAGS: 00000206 RAX: ffff8800aae94000 RBX: 0000000000000000 RCX: 00000000000006e0 RDX: 0000000000000020 RSI: 0000000004208060 RDI: 0000000000215d80 RBP: ffff88014f403f80 R08: 0000000b1b42c180 R09: 0000000000000022 R10: 0000000000000004 R11: 00000000ffffffff R12: 000000000000a030 R13: 0000000000000082 R14: ffff8800aa4d0080 R15: 0000000000000082 FS: 0000000000000000(0000) GS:ffff88014f400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa53b90c000 CR3: 0000000001a0a000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Stack: 042080601b33869f ffff8800aae94000 00000000fffc2678 ffff88010000000a 0000000000000000 000000000000a030 0000000000005302 ffff8800aa4d0080 0000000000000206 ffff88014f403f90 ffffffff8104a716 ffff88014f403fa8 Call Trace: <IRQ> [<ffffffff8104a716>] irq_exit+0x86/0x90 [<ffffffff81031e7d>] smp_apic_timer_interrupt+0x3d/0x50 [<ffffffff814f3eac>] apic_timer_interrupt+0x7c/0x90 <EOI> [<ffffffffa01c5b40>] ? gen8_write64+0x1a0/0x1a0 [i915] [<ffffffff814f2b39>] ? _raw_spin_unlock_irqrestore+0x9/0x20 [<ffffffffa01c5c44>] gen8_write32+0x104/0x1a0 [i915] [<ffffffff8132c6a2>] ? n_tty_receive_buf_common+0x372/0xae0 [<ffffffffa017cc9e>] gen6_set_rps_thresholds+0x1be/0x330 [i915] [<ffffffffa017eaf0>] gen6_set_rps+0x70/0x200 [i915] [<ffffffffa0185375>] intel_set_rps+0x25/0x30 [i915] [<ffffffffa01768fd>] gen6_pm_rps_work+0x10d/0x2e0 [i915] [<ffffffff81063852>] ? finish_task_switch+0x72/0x1c0 [<ffffffff8105ab29>] process_one_work+0x139/0x350 [<ffffffff8105b186>] worker_thread+0x126/0x490 [<ffffffff8105b060>] ? rescuer_thread+0x320/0x320 [<ffffffff8105fa64>] kthread+0xc4/0xe0 [<ffffffff8105f9a0>] ? kthread_create_on_node+0x170/0x170 [<ffffffff814f351f>] ret_from_fork+0x3f/0x70 [<ffffffff8105f9a0>] ? kthread_create_on_node+0x170/0x170 I could not explain, or find a code path, which would explain a +20 second lockup, but from some instrumentation it was apparent the interrupts off proportion of time was between 10-25% under heavy load which is quite bad. When a interrupt "cliff" is reached, which was >~320k irq/s on my machine, the whole system goes into a terrible state of the above described multi-second lockups. By moving the GT interrupt handling to a tasklet in a most simple way, the problem above disappears completely. Testing the effect on sytem-wide latencies using igt/gem_syslatency shows the following before this patch: gem_syslatency: cycles=1532739, latency mean=416531.829us max=2499237us gem_syslatency: cycles=1839434, latency mean=1458099.157us max=4998944us gem_syslatency: cycles=1432570, latency mean=2688.451us max=1201185us gem_syslatency: cycles=1533543, latency mean=416520.499us max=2498886us This shows that the unrelated process is experiencing huge delays in its wake-up latency. After the patch the results look like this: gem_syslatency: cycles=808907, latency mean=53.133us max=1640us gem_syslatency: cycles=862154, latency mean=62.778us max=2117us gem_syslatency: cycles=856039, latency mean=58.079us max=2123us gem_syslatency: cycles=841683, latency mean=56.914us max=1667us Showing a huge improvement in the unrelated process wake-up latency. It also shows an approximate halving in the number of total empty batches submitted during the test. This may not be worrying since the test puts the driver under a very unrealistic load with ncpu threads doing empty batch submission to all GPU engines each. Another benefit compared to the hard-irq handling is that now work on all engines can be dispatched in parallel since we can have up to number of CPUs active tasklets. (While previously a single hard-irq would serially dispatch on one engine after another.) More interesting scenario with regards to throughput is "gem_latency -n 100" which shows 25% better throughput and CPU usage, and 14% better dispatch latencies. I did not find any gains or regressions with Synmark2 or GLbench under light testing. More benchmarking is certainly required. v2: * execlists_lock should be taken as spin_lock_bh when queuing work from userspace now. (Chris Wilson) * uncore.lock must be taken with spin_lock_irq when submitting requests since that now runs from either softirq or process context. v3: * Expanded commit message with more testing data; * converted missed locking sites to _bh; * added execlist_lock comment. (Chris Wilson) v4: * Mention dispatch parallelism in commit. (Chris Wilson) * Do not hold uncore.lock over MMIO reads since the block is already serialised per-engine via the tasklet itself. (Chris Wilson) * intel_lrc_irq_handler should be static. (Chris Wilson) * Cancel/sync the tasklet on GPU reset. (Chris Wilson) * Document and WARN that tasklet cannot be active/pending on engine cleanup. (Chris Wilson/Imre Deak) Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Imre Deak <imre.deak@intel.com> Testcase: igt/gem_exec_nop/all Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94350 Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/1459768316-6670-1-git-send-email-tvrtko.ursulin@linux.intel.com
2016-04-04 18:11:56 +07:00
spin_unlock_bh(&engine->execlist_lock);
seq_printf(m, "\t%d requests in queue\n", count);
if (head_req) {
seq_printf(m, "\tHead request context: %u\n",
head_req->ctx->hw_id);
seq_printf(m, "\tHead request tail: %u\n",
head_req->tail);
}
seq_putc(m, '\n');
}
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
return 0;
}
static const char *swizzle_string(unsigned swizzle)
{
switch (swizzle) {
case I915_BIT_6_SWIZZLE_NONE:
return "none";
case I915_BIT_6_SWIZZLE_9:
return "bit9";
case I915_BIT_6_SWIZZLE_9_10:
return "bit9/bit10";
case I915_BIT_6_SWIZZLE_9_11:
return "bit9/bit11";
case I915_BIT_6_SWIZZLE_9_10_11:
return "bit9/bit10/bit11";
case I915_BIT_6_SWIZZLE_9_17:
return "bit9/bit17";
case I915_BIT_6_SWIZZLE_9_10_17:
return "bit9/bit10/bit17";
case I915_BIT_6_SWIZZLE_UNKNOWN:
return "unknown";
}
return "bug";
}
static int i915_swizzle_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
swizzle_string(dev_priv->mm.bit_6_swizzle_x));
seq_printf(m, "bit6 swizzle for Y-tiling = %s\n",
swizzle_string(dev_priv->mm.bit_6_swizzle_y));
if (IS_GEN3(dev) || IS_GEN4(dev)) {
seq_printf(m, "DDC = 0x%08x\n",
I915_READ(DCC));
seq_printf(m, "DDC2 = 0x%08x\n",
I915_READ(DCC2));
seq_printf(m, "C0DRB3 = 0x%04x\n",
I915_READ16(C0DRB3));
seq_printf(m, "C1DRB3 = 0x%04x\n",
I915_READ16(C1DRB3));
} else if (INTEL_INFO(dev)->gen >= 6) {
seq_printf(m, "MAD_DIMM_C0 = 0x%08x\n",
I915_READ(MAD_DIMM_C0));
seq_printf(m, "MAD_DIMM_C1 = 0x%08x\n",
I915_READ(MAD_DIMM_C1));
seq_printf(m, "MAD_DIMM_C2 = 0x%08x\n",
I915_READ(MAD_DIMM_C2));
seq_printf(m, "TILECTL = 0x%08x\n",
I915_READ(TILECTL));
if (INTEL_INFO(dev)->gen >= 8)
seq_printf(m, "GAMTARBMODE = 0x%08x\n",
I915_READ(GAMTARBMODE));
else
seq_printf(m, "ARB_MODE = 0x%08x\n",
I915_READ(ARB_MODE));
seq_printf(m, "DISP_ARB_CTL = 0x%08x\n",
I915_READ(DISP_ARB_CTL));
}
if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
seq_puts(m, "L-shaped memory detected\n");
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int per_file_ctx(int id, void *ptr, void *data)
{
struct i915_gem_context *ctx = ptr;
struct seq_file *m = data;
struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
if (!ppgtt) {
seq_printf(m, " no ppgtt for context %d\n",
ctx->user_handle);
return 0;
}
if (i915_gem_context_is_default(ctx))
seq_puts(m, " default context:\n");
else
drm/i915: Emphasize that ctx->id is merely a user handle This is an Execlists preparatory patch, since they make context ID become an overloaded term: - In the software, it was used to distinguish which context userspace was trying to use. - In the BSpec, the term is used to describe the 20-bits long field the hardware uses to it to discriminate the contexts that are submitted to the ELSP and inform the driver about their current status (via Context Switch Interrupts and Context Status Buffers). Initially, I tried to make the different meanings converge, but it proved impossible: - The software ctx->id is per-filp, while the hardware one needs to be globally unique. - Also, we multiplex several backing states objects per intel_context, and all of them need unique HW IDs. - I tried adding a per-filp ID and then composing the HW context ID as: ctx->id + file_priv->id + ring->id, but the fact that the hardware only uses 20-bits means we have to artificially limit the number of filps or contexts the userspace can create. The ctx->user_handle renaming bits are done with this Cocci patch (plus manual frobbing of the struct declaration): @@ struct intel_context c; @@ - (c).id + c.user_handle @@ struct intel_context *c; @@ - (c)->id + c->user_handle Also, while we are at it, s/DEFAULT_CONTEXT_ID/DEFAULT_CONTEXT_HANDLE and change the type to unsigned 32 bits. v2: s/handle/user_handle and change the type to uint32_t as suggested by Chris Wilson. Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> (v1) Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-07-03 22:28:00 +07:00
seq_printf(m, " context %d:\n", ctx->user_handle);
ppgtt->debug_dump(ppgtt, m);
return 0;
}
static void gen8_ppgtt_info(struct seq_file *m, struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_engine_cs *engine;
struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
int i;
if (!ppgtt)
return;
for_each_engine(engine, dev_priv) {
seq_printf(m, "%s\n", engine->name);
for (i = 0; i < 4; i++) {
u64 pdp = I915_READ(GEN8_RING_PDP_UDW(engine, i));
pdp <<= 32;
pdp |= I915_READ(GEN8_RING_PDP_LDW(engine, i));
seq_printf(m, "\tPDP%d 0x%016llx\n", i, pdp);
}
}
}
static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_engine_cs *engine;
if (IS_GEN6(dev_priv))
seq_printf(m, "GFX_MODE: 0x%08x\n", I915_READ(GFX_MODE));
for_each_engine(engine, dev_priv) {
seq_printf(m, "%s\n", engine->name);
if (IS_GEN7(dev_priv))
seq_printf(m, "GFX_MODE: 0x%08x\n",
I915_READ(RING_MODE_GEN7(engine)));
seq_printf(m, "PP_DIR_BASE: 0x%08x\n",
I915_READ(RING_PP_DIR_BASE(engine)));
seq_printf(m, "PP_DIR_BASE_READ: 0x%08x\n",
I915_READ(RING_PP_DIR_BASE_READ(engine)));
seq_printf(m, "PP_DIR_DCLV: 0x%08x\n",
I915_READ(RING_PP_DIR_DCLV(engine)));
}
if (dev_priv->mm.aliasing_ppgtt) {
struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
seq_puts(m, "aliasing PPGTT:\n");
seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd.base.ggtt_offset);
ppgtt->debug_dump(ppgtt, m);
}
seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK));
}
static int i915_ppgtt_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_file *file;
int ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
if (INTEL_INFO(dev)->gen >= 8)
gen8_ppgtt_info(m, dev);
else if (INTEL_INFO(dev)->gen >= 6)
gen6_ppgtt_info(m, dev);
mutex_lock(&dev->filelist_mutex);
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
struct drm_i915_file_private *file_priv = file->driver_priv;
struct task_struct *task;
task = get_pid_task(file->pid, PIDTYPE_PID);
if (!task) {
ret = -ESRCH;
goto out_unlock;
}
seq_printf(m, "\nproc: %s\n", task->comm);
put_task_struct(task);
idr_for_each(&file_priv->context_idr, per_file_ctx,
(void *)(unsigned long)m);
}
out_unlock:
mutex_unlock(&dev->filelist_mutex);
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
return ret;
}
static int count_irq_waiters(struct drm_i915_private *i915)
{
struct intel_engine_cs *engine;
int count = 0;
for_each_engine(engine, i915)
drm/i915: Slaughter the thundering i915_wait_request herd One particularly stressful scenario consists of many independent tasks all competing for GPU time and waiting upon the results (e.g. realtime transcoding of many, many streams). One bottleneck in particular is that each client waits on its own results, but every client is woken up after every batchbuffer - hence the thunder of hooves as then every client must do its heavyweight dance to read a coherent seqno to see if it is the lucky one. Ideally, we only want one client to wake up after the interrupt and check its request for completion. Since the requests must retire in order, we can select the first client on the oldest request to be woken. Once that client has completed his wait, we can then wake up the next client and so on. However, all clients then incur latency as every process in the chain may be delayed for scheduling - this may also then cause some priority inversion. To reduce the latency, when a client is added or removed from the list, we scan the tree for completed seqno and wake up all the completed waiters in parallel. Using igt/benchmarks/gem_latency, we can demonstrate this effect. The benchmark measures the number of GPU cycles between completion of a batch and the client waking up from a call to wait-ioctl. With many concurrent waiters, with each on a different request, we observe that the wakeup latency before the patch scales nearly linearly with the number of waiters (before external factors kick in making the scaling much worse). After applying the patch, we can see that only the single waiter for the request is being woken up, providing a constant wakeup latency for every operation. However, the situation is not quite as rosy for many waiters on the same request, though to the best of my knowledge this is much less likely in practice. Here, we can observe that the concurrent waiters incur extra latency from being woken up by the solitary bottom-half, rather than directly by the interrupt. This appears to be scheduler induced (having discounted adverse effects from having a rbtree walk/erase in the wakeup path), each additional wake_up_process() costs approximately 1us on big core. Another effect of performing the secondary wakeups from the first bottom-half is the incurred delay this imposes on high priority threads - rather than immediately returning to userspace and leaving the interrupt handler to wake the others. To offset the delay incurred with additional waiters on a request, we could use a hybrid scheme that did a quick read in the interrupt handler and dequeued all the completed waiters (incurring the overhead in the interrupt handler, not the best plan either as we then incur GPU submission latency) but we would still have to wake up the bottom-half every time to do the heavyweight slow read. Or we could only kick the waiters on the seqno with the same priority as the current task (i.e. in the realtime waiter scenario, only it is woken up immediately by the interrupt and simply queues the next waiter before returning to userspace, minimising its delay at the expense of the chain, and also reducing contention on its scheduler runqueue). This is effective at avoid long pauses in the interrupt handler and at avoiding the extra latency in realtime/high-priority waiters. v2: Convert from a kworker per engine into a dedicated kthread for the bottom-half. v3: Rename request members and tweak comments. v4: Use a per-engine spinlock in the breadcrumbs bottom-half. v5: Fix race in locklessly checking waiter status and kicking the task on adding a new waiter. v6: Fix deciding when to force the timer to hide missing interrupts. v7: Move the bottom-half from the kthread to the first client process. v8: Reword a few comments v9: Break the busy loop when the interrupt is unmasked or has fired. v10: Comments, unnecessary churn, better debugging from Tvrtko v11: Wake all completed waiters on removing the current bottom-half to reduce the latency of waking up a herd of clients all waiting on the same request. v12: Rearrange missed-interrupt fault injection so that it works with igt/drv_missed_irq_hang v13: Rename intel_breadcrumb and friends to intel_wait in preparation for signal handling. v14: RCU commentary, assert_spin_locked v15: Hide BUG_ON behind the compiler; report on gem_latency findings. v16: Sort seqno-groups by priority so that first-waiter has the highest task priority (and so avoid priority inversion). v17: Add waiters to post-mortem GPU hang state. v18: Return early for a completed wait after acquiring the spinlock. Avoids adding ourselves to the tree if the is already complete, and skips the awkward question of why we don't do completion wakeups for waits earlier than or equal to ourselves. v19: Prepare for init_breadcrumbs to fail. Later patches may want to allocate during init, so be prepared to propagate back the error code. Testcase: igt/gem_concurrent_blit Testcase: igt/benchmarks/gem_latency Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: "Rogozhkin, Dmitry V" <dmitry.v.rogozhkin@intel.com> Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Dave Gordon <david.s.gordon@intel.com> Cc: "Goel, Akash" <akash.goel@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> #v18 Link: http://patchwork.freedesktop.org/patch/msgid/1467390209-3576-6-git-send-email-chris@chris-wilson.co.uk
2016-07-01 23:23:15 +07:00
count += intel_engine_has_waiter(engine);
return count;
}
static int i915_rps_boost_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_file *file;
seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled);
seq_printf(m, "GPU busy? %s [%x]\n",
yesno(dev_priv->gt.awake), dev_priv->gt.active_engines);
seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
seq_printf(m, "Frequency requested %d; min hard:%d, soft:%d; max soft:%d, hard:%d\n",
intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit),
intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit),
intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
mutex_lock(&dev->filelist_mutex);
spin_lock(&dev_priv->rps.client_lock);
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
struct drm_i915_file_private *file_priv = file->driver_priv;
struct task_struct *task;
rcu_read_lock();
task = pid_task(file->pid, PIDTYPE_PID);
seq_printf(m, "%s [%d]: %d boosts%s\n",
task ? task->comm : "<unknown>",
task ? task->pid : -1,
file_priv->rps.boosts,
list_empty(&file_priv->rps.link) ? "" : ", active");
rcu_read_unlock();
}
seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts);
spin_unlock(&dev_priv->rps.client_lock);
mutex_unlock(&dev->filelist_mutex);
return 0;
}
static int i915_llc(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
const bool edram = INTEL_GEN(dev_priv) > 8;
seq_printf(m, "LLC: %s\n", yesno(HAS_LLC(dev)));
seq_printf(m, "%s: %lluMB\n", edram ? "eDRAM" : "eLLC",
intel_uncore_edram_size(dev_priv)/1024/1024);
return 0;
}
static int i915_guc_load_status_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_i915_private *dev_priv = to_i915(node->minor->dev);
struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
u32 tmp, i;
if (!HAS_GUC_UCODE(dev_priv))
return 0;
seq_printf(m, "GuC firmware status:\n");
seq_printf(m, "\tpath: %s\n",
guc_fw->guc_fw_path);
seq_printf(m, "\tfetch: %s\n",
intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status));
seq_printf(m, "\tload: %s\n",
intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));
seq_printf(m, "\tversion wanted: %d.%d\n",
guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted);
seq_printf(m, "\tversion found: %d.%d\n",
guc_fw->guc_fw_major_found, guc_fw->guc_fw_minor_found);
seq_printf(m, "\theader: offset is %d; size = %d\n",
guc_fw->header_offset, guc_fw->header_size);
seq_printf(m, "\tuCode: offset is %d; size = %d\n",
guc_fw->ucode_offset, guc_fw->ucode_size);
seq_printf(m, "\tRSA: offset is %d; size = %d\n",
guc_fw->rsa_offset, guc_fw->rsa_size);
tmp = I915_READ(GUC_STATUS);
seq_printf(m, "\nGuC status 0x%08x:\n", tmp);
seq_printf(m, "\tBootrom status = 0x%x\n",
(tmp & GS_BOOTROM_MASK) >> GS_BOOTROM_SHIFT);
seq_printf(m, "\tuKernel status = 0x%x\n",
(tmp & GS_UKERNEL_MASK) >> GS_UKERNEL_SHIFT);
seq_printf(m, "\tMIA Core status = 0x%x\n",
(tmp & GS_MIA_MASK) >> GS_MIA_SHIFT);
seq_puts(m, "\nScratch registers:\n");
for (i = 0; i < 16; i++)
seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i)));
return 0;
}
static void i915_guc_client_info(struct seq_file *m,
struct drm_i915_private *dev_priv,
struct i915_guc_client *client)
{
struct intel_engine_cs *engine;
uint64_t tot = 0;
seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
client->priority, client->ctx_index, client->proc_desc_offset);
seq_printf(m, "\tDoorbell id %d, offset: 0x%x, cookie 0x%x\n",
client->doorbell_id, client->doorbell_offset, client->cookie);
seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
client->wq_size, client->wq_offset, client->wq_tail);
seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
seq_printf(m, "\tFailed to queue: %u\n", client->q_fail);
seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
seq_printf(m, "\tLast submission result: %d\n", client->retcode);
for_each_engine(engine, dev_priv) {
seq_printf(m, "\tSubmissions: %llu %s\n",
client->submissions[engine->id],
engine->name);
tot += client->submissions[engine->id];
}
seq_printf(m, "\tTotal: %llu\n", tot);
}
static int i915_guc_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_guc guc;
struct i915_guc_client client = {};
struct intel_engine_cs *engine;
u64 total = 0;
if (!HAS_GUC_SCHED(dev_priv))
return 0;
if (mutex_lock_interruptible(&dev->struct_mutex))
return 0;
/* Take a local copy of the GuC data, so we can dump it at leisure */
guc = dev_priv->guc;
if (guc.execbuf_client)
client = *guc.execbuf_client;
mutex_unlock(&dev->struct_mutex);
seq_printf(m, "Doorbell map:\n");
seq_printf(m, "\t%*pb\n", GUC_MAX_DOORBELLS, guc.doorbell_bitmap);
seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc.db_cacheline);
seq_printf(m, "GuC total action count: %llu\n", guc.action_count);
seq_printf(m, "GuC action failure count: %u\n", guc.action_fail);
seq_printf(m, "GuC last action command: 0x%x\n", guc.action_cmd);
seq_printf(m, "GuC last action status: 0x%x\n", guc.action_status);
seq_printf(m, "GuC last action error code: %d\n", guc.action_err);
seq_printf(m, "\nGuC submissions:\n");
for_each_engine(engine, dev_priv) {
seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n",
engine->name, guc.submissions[engine->id],
guc.last_seqno[engine->id]);
total += guc.submissions[engine->id];
}
seq_printf(m, "\t%s: %llu\n", "Total", total);
seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client);
i915_guc_client_info(m, dev_priv, &client);
/* Add more as required ... */
return 0;
}
static int i915_guc_log_dump(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_gem_object *log_obj = dev_priv->guc.log_obj;
u32 *log;
int i = 0, pg;
if (!log_obj)
return 0;
for (pg = 0; pg < log_obj->base.size / PAGE_SIZE; pg++) {
log = kmap_atomic(i915_gem_object_get_page(log_obj, pg));
for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
*(log + i), *(log + i + 1),
*(log + i + 2), *(log + i + 3));
kunmap_atomic(log);
}
seq_putc(m, '\n');
return 0;
}
static int i915_edp_psr_status(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
u32 psrperf = 0;
u32 stat[3];
enum pipe pipe;
bool enabled = false;
if (!HAS_PSR(dev)) {
seq_puts(m, "PSR not supported\n");
return 0;
}
intel_runtime_pm_get(dev_priv);
mutex_lock(&dev_priv->psr.lock);
seq_printf(m, "Sink_Support: %s\n", yesno(dev_priv->psr.sink_support));
seq_printf(m, "Source_OK: %s\n", yesno(dev_priv->psr.source_ok));
seq_printf(m, "Enabled: %s\n", yesno((bool)dev_priv->psr.enabled));
seq_printf(m, "Active: %s\n", yesno(dev_priv->psr.active));
seq_printf(m, "Busy frontbuffer bits: 0x%03x\n",
dev_priv->psr.busy_frontbuffer_bits);
seq_printf(m, "Re-enable work scheduled: %s\n",
yesno(work_busy(&dev_priv->psr.work.work)));
if (HAS_DDI(dev))
enabled = I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE;
else {
for_each_pipe(dev_priv, pipe) {
stat[pipe] = I915_READ(VLV_PSRSTAT(pipe)) &
VLV_EDP_PSR_CURR_STATE_MASK;
if ((stat[pipe] == VLV_EDP_PSR_ACTIVE_NORFB_UP) ||
(stat[pipe] == VLV_EDP_PSR_ACTIVE_SF_UPDATE))
enabled = true;
}
}
seq_printf(m, "Main link in standby mode: %s\n",
yesno(dev_priv->psr.link_standby));
seq_printf(m, "HW Enabled & Active bit: %s", yesno(enabled));
if (!HAS_DDI(dev))
for_each_pipe(dev_priv, pipe) {
if ((stat[pipe] == VLV_EDP_PSR_ACTIVE_NORFB_UP) ||
(stat[pipe] == VLV_EDP_PSR_ACTIVE_SF_UPDATE))
seq_printf(m, " pipe %c", pipe_name(pipe));
}
seq_puts(m, "\n");
/*
* VLV/CHV PSR has no kind of performance counter
* SKL+ Perf counter is reset to 0 everytime DC state is entered
*/
if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
psrperf = I915_READ(EDP_PSR_PERF_CNT) &
EDP_PSR_PERF_CNT_MASK;
seq_printf(m, "Performance_Counter: %u\n", psrperf);
}
mutex_unlock(&dev_priv->psr.lock);
intel_runtime_pm_put(dev_priv);
return 0;
}
static int i915_sink_crc(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct intel_connector *connector;
struct intel_dp *intel_dp = NULL;
int ret;
u8 crc[6];
drm_modeset_lock_all(dev);
for_each_intel_connector(dev, connector) {
struct drm_crtc *crtc;
if (!connector->base.state->best_encoder)
continue;
crtc = connector->base.state->crtc;
if (!crtc->state->active)
drm/i915: don't reference null pointer at i915_sink_crc Reproducible by runtime suspending a Haswell machine with eDP + HDMI outputs connected. [ 209.600086] [drm:i915_runtime_suspend], Suspending device [ 209.688435] BUG: unable to handle kernel NULL pointer dereference at 0000000000000060 [ 209.688500] IP: [<ffffffffa0109d4e>] i915_sink_crc+0x6e/0xf0 [i915] [ 209.688577] PGD 36aba067 PUD 35d7f067 PMD 0 [ 209.688613] Oops: 0000 [#1] SMP [ 209.688641] Modules linked in: fuse ip6table_filter ip6_tables ebtable_nat ebtables iTCO_wdt iTCO_vendor_support x86_pkg_temp_thermal coretemp microcode serio_raw e1000e pcspkr i2c_i801 ptp mei_me mei lpc_ich mfd_core pps_core dm_crypt i915 i2c_algo_bit crc32_pclmul drm_kms_helper crc32c_intel drm ghash_clmulni_intel video [ 209.688893] CPU: 1 PID: 1797 Comm: pm_pc8 Not tainted 3.13.0+ #118 [ 209.688937] Hardware name: Intel Corporation Shark Bay Client platform/WhiteTip Mountain 1, BIOS HSWLPTU1.86C.0133.R00.1309172123 09/17/2013 [ 209.689023] task: ffff88007fb4b690 ti: ffff88007d9d2000 task.ti: ffff88007d9d2000 [ 209.689074] RIP: 0010:[<ffffffffa0109d4e>] [<ffffffffa0109d4e>] i915_sink_crc+0x6e/0xf0 [i915] [ 209.689169] RSP: 0018:ffff88007d9d3e68 EFLAGS: 00010246 [ 209.689205] RAX: 0000000000000000 RBX: ffff880036a03478 RCX: ffff8800366c9770 [ 209.689252] RDX: ffff88014325cf38 RSI: ffff88007fb4bd08 RDI: ffff88007fb4b690 [ 209.689299] RBP: ffff88007d9d3e98 R08: 0000000000000000 R09: 0000000000000000 [ 209.689346] R10: 0000000000000001 R11: 0000000000000000 R12: ffff8800366c9148 [ 209.689393] R13: 00000000ffffffed R14: ffff88007d9d3f50 R15: ffff880036a03478 [ 209.689441] FS: 00007f5a74bc29c0(0000) GS:ffff88014f240000(0000) knlGS:0000000000000000 [ 209.689494] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 209.689533] CR2: 0000000000000060 CR3: 0000000079d7e000 CR4: 00000000001407e0 [ 209.689580] Stack: [ 209.689594] 0000000000001000 ffff880146083980 ffff880146083980 0000000000000000 [ 209.689649] ffff880146083980 0000000000000001 ffff88007d9d3f00 ffffffff811d0744 [ 209.689702] 0000000000000046 00007fff7949fe20 ffff880036a034b8 0000000000000080 [ 209.689756] Call Trace: [ 209.689778] [<ffffffff811d0744>] seq_read+0x164/0x3e0 [ 209.689816] [<ffffffff811ab165>] vfs_read+0x95/0x160 [ 209.689851] [<ffffffff811abc79>] SyS_read+0x49/0xa0 [ 209.689888] [<ffffffff810ef64c>] ? __audit_syscall_entry+0x9c/0xf0 [ 209.689933] [<ffffffff81659412>] system_call_fastpath+0x16/0x1b Testcase: igt/pm_pc8 (do a full run, it will fail at the debugfs-read subtest) Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> [danvet: Flip around NULL check for robustness.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-14 02:51:33 +07:00
continue;
if (connector->base.connector_type != DRM_MODE_CONNECTOR_eDP)
continue;
intel_dp = enc_to_intel_dp(connector->base.state->best_encoder);
ret = intel_dp_sink_crc(intel_dp, crc);
if (ret)
goto out;
seq_printf(m, "%02x%02x%02x%02x%02x%02x\n",
crc[0], crc[1], crc[2],
crc[3], crc[4], crc[5]);
goto out;
}
ret = -ENODEV;
out:
drm_modeset_unlock_all(dev);
return ret;
}
static int i915_energy_uJ(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
u64 power;
u32 units;
if (INTEL_INFO(dev)->gen < 6)
return -ENODEV;
intel_runtime_pm_get(dev_priv);
rdmsrl(MSR_RAPL_POWER_UNIT, power);
power = (power & 0x1f00) >> 8;
units = 1000000 / (1 << power); /* convert to uJ */
power = I915_READ(MCH_SECP_NRG_STTS);
power *= units;
intel_runtime_pm_put(dev_priv);
seq_printf(m, "%llu", (long long unsigned)power);
return 0;
}
static int i915_runtime_pm_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
if (!HAS_RUNTIME_PM(dev_priv))
seq_puts(m, "Runtime power management not supported\n");
seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
seq_printf(m, "IRQs disabled: %s\n",
yesno(!intel_irqs_enabled(dev_priv)));
#ifdef CONFIG_PM
seq_printf(m, "Usage count: %d\n",
atomic_read(&dev->dev->power.usage_count));
#else
seq_printf(m, "Device Power Management (CONFIG_PM) disabled\n");
#endif
seq_printf(m, "PCI device power state: %s [%d]\n",
pci_power_name(dev_priv->drm.pdev->current_state),
dev_priv->drm.pdev->current_state);
return 0;
}
static int i915_power_domain_info(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_power_domains *power_domains = &dev_priv->power_domains;
int i;
mutex_lock(&power_domains->lock);
seq_printf(m, "%-25s %s\n", "Power well/domain", "Use count");
for (i = 0; i < power_domains->power_well_count; i++) {
struct i915_power_well *power_well;
enum intel_display_power_domain power_domain;
power_well = &power_domains->power_wells[i];
seq_printf(m, "%-25s %d\n", power_well->name,
power_well->count);
for (power_domain = 0; power_domain < POWER_DOMAIN_NUM;
power_domain++) {
if (!(BIT(power_domain) & power_well->domains))
continue;
seq_printf(m, " %-23s %d\n",
intel_display_power_domain_str(power_domain),
power_domains->domain_use_count[power_domain]);
}
}
mutex_unlock(&power_domains->lock);
return 0;
}
static int i915_dmc_info(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_csr *csr;
if (!HAS_CSR(dev)) {
seq_puts(m, "not supported\n");
return 0;
}
csr = &dev_priv->csr;
intel_runtime_pm_get(dev_priv);
seq_printf(m, "fw loaded: %s\n", yesno(csr->dmc_payload != NULL));
seq_printf(m, "path: %s\n", csr->fw_path);
if (!csr->dmc_payload)
goto out;
seq_printf(m, "version: %d.%d\n", CSR_VERSION_MAJOR(csr->version),
CSR_VERSION_MINOR(csr->version));
if (IS_SKYLAKE(dev) && csr->version >= CSR_VERSION(1, 6)) {
seq_printf(m, "DC3 -> DC5 count: %d\n",
I915_READ(SKL_CSR_DC3_DC5_COUNT));
seq_printf(m, "DC5 -> DC6 count: %d\n",
I915_READ(SKL_CSR_DC5_DC6_COUNT));
} else if (IS_BROXTON(dev) && csr->version >= CSR_VERSION(1, 4)) {
seq_printf(m, "DC3 -> DC5 count: %d\n",
I915_READ(BXT_CSR_DC3_DC5_COUNT));
}
out:
seq_printf(m, "program base: 0x%08x\n", I915_READ(CSR_PROGRAM(0)));
seq_printf(m, "ssp base: 0x%08x\n", I915_READ(CSR_SSP_BASE));
seq_printf(m, "htp: 0x%08x\n", I915_READ(CSR_HTP_SKL));
intel_runtime_pm_put(dev_priv);
return 0;
}
static void intel_seq_print_mode(struct seq_file *m, int tabs,
struct drm_display_mode *mode)
{
int i;
for (i = 0; i < tabs; i++)
seq_putc(m, '\t');
seq_printf(m, "id %d:\"%s\" freq %d clock %d hdisp %d hss %d hse %d htot %d vdisp %d vss %d vse %d vtot %d type 0x%x flags 0x%x\n",
mode->base.id, mode->name,
mode->vrefresh, mode->clock,
mode->hdisplay, mode->hsync_start,
mode->hsync_end, mode->htotal,
mode->vdisplay, mode->vsync_start,
mode->vsync_end, mode->vtotal,
mode->type, mode->flags);
}
static void intel_encoder_info(struct seq_file *m,
struct intel_crtc *intel_crtc,
struct intel_encoder *intel_encoder)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_crtc *crtc = &intel_crtc->base;
struct intel_connector *intel_connector;
struct drm_encoder *encoder;
encoder = &intel_encoder->base;
seq_printf(m, "\tencoder %d: type: %s, connectors:\n",
encoder->base.id, encoder->name);
for_each_connector_on_encoder(dev, encoder, intel_connector) {
struct drm_connector *connector = &intel_connector->base;
seq_printf(m, "\t\tconnector %d: type: %s, status: %s",
connector->base.id,
connector->name,
drm_get_connector_status_name(connector->status));
if (connector->status == connector_status_connected) {
struct drm_display_mode *mode = &crtc->mode;
seq_printf(m, ", mode:\n");
intel_seq_print_mode(m, 2, mode);
} else {
seq_putc(m, '\n');
}
}
}
static void intel_crtc_info(struct seq_file *m, struct intel_crtc *intel_crtc)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_crtc *crtc = &intel_crtc->base;
struct intel_encoder *intel_encoder;
struct drm_plane_state *plane_state = crtc->primary->state;
struct drm_framebuffer *fb = plane_state->fb;
if (fb)
seq_printf(m, "\tfb: %d, pos: %dx%d, size: %dx%d\n",
fb->base.id, plane_state->src_x >> 16,
plane_state->src_y >> 16, fb->width, fb->height);
else
seq_puts(m, "\tprimary plane disabled\n");
for_each_encoder_on_crtc(dev, crtc, intel_encoder)
intel_encoder_info(m, intel_crtc, intel_encoder);
}
static void intel_panel_info(struct seq_file *m, struct intel_panel *panel)
{
struct drm_display_mode *mode = panel->fixed_mode;
seq_printf(m, "\tfixed mode:\n");
intel_seq_print_mode(m, 2, mode);
}
static void intel_dp_info(struct seq_file *m,
struct intel_connector *intel_connector)
{
struct intel_encoder *intel_encoder = intel_connector->encoder;
struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base);
seq_printf(m, "\tDPCD rev: %x\n", intel_dp->dpcd[DP_DPCD_REV]);
seq_printf(m, "\taudio support: %s\n", yesno(intel_dp->has_audio));
if (intel_connector->base.connector_type == DRM_MODE_CONNECTOR_eDP)
intel_panel_info(m, &intel_connector->panel);
}
static void intel_hdmi_info(struct seq_file *m,
struct intel_connector *intel_connector)
{
struct intel_encoder *intel_encoder = intel_connector->encoder;
struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&intel_encoder->base);
seq_printf(m, "\taudio support: %s\n", yesno(intel_hdmi->has_audio));
}
static void intel_lvds_info(struct seq_file *m,
struct intel_connector *intel_connector)
{
intel_panel_info(m, &intel_connector->panel);
}
static void intel_connector_info(struct seq_file *m,
struct drm_connector *connector)
{
struct intel_connector *intel_connector = to_intel_connector(connector);
struct intel_encoder *intel_encoder = intel_connector->encoder;
struct drm_display_mode *mode;
seq_printf(m, "connector %d: type %s, status: %s\n",
connector->base.id, connector->name,
drm_get_connector_status_name(connector->status));
if (connector->status == connector_status_connected) {
seq_printf(m, "\tname: %s\n", connector->display_info.name);
seq_printf(m, "\tphysical dimensions: %dx%dmm\n",
connector->display_info.width_mm,
connector->display_info.height_mm);
seq_printf(m, "\tsubpixel order: %s\n",
drm_get_subpixel_order_name(connector->display_info.subpixel_order));
seq_printf(m, "\tCEA rev: %d\n",
connector->display_info.cea_rev);
}
if (!intel_encoder || intel_encoder->type == INTEL_OUTPUT_DP_MST)
return;
switch (connector->connector_type) {
case DRM_MODE_CONNECTOR_DisplayPort:
case DRM_MODE_CONNECTOR_eDP:
intel_dp_info(m, intel_connector);
break;
case DRM_MODE_CONNECTOR_LVDS:
if (intel_encoder->type == INTEL_OUTPUT_LVDS)
intel_lvds_info(m, intel_connector);
break;
case DRM_MODE_CONNECTOR_HDMIA:
if (intel_encoder->type == INTEL_OUTPUT_HDMI ||
intel_encoder->type == INTEL_OUTPUT_UNKNOWN)
intel_hdmi_info(m, intel_connector);
break;
default:
break;
}
seq_printf(m, "\tmodes:\n");
list_for_each_entry(mode, &connector->modes, head)
intel_seq_print_mode(m, 2, mode);
}
static bool cursor_active(struct drm_device *dev, int pipe)
{
struct drm_i915_private *dev_priv = to_i915(dev);
u32 state;
if (IS_845G(dev) || IS_I865G(dev))
state = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE;
else
state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE;
return state;
}
static bool cursor_position(struct drm_device *dev, int pipe, int *x, int *y)
{
struct drm_i915_private *dev_priv = to_i915(dev);
u32 pos;
pos = I915_READ(CURPOS(pipe));
*x = (pos >> CURSOR_X_SHIFT) & CURSOR_POS_MASK;
if (pos & (CURSOR_POS_SIGN << CURSOR_X_SHIFT))
*x = -*x;
*y = (pos >> CURSOR_Y_SHIFT) & CURSOR_POS_MASK;
if (pos & (CURSOR_POS_SIGN << CURSOR_Y_SHIFT))
*y = -*y;
return cursor_active(dev, pipe);
}
static const char *plane_type(enum drm_plane_type type)
{
switch (type) {
case DRM_PLANE_TYPE_OVERLAY:
return "OVL";
case DRM_PLANE_TYPE_PRIMARY:
return "PRI";
case DRM_PLANE_TYPE_CURSOR:
return "CUR";
/*
* Deliberately omitting default: to generate compiler warnings
* when a new drm_plane_type gets added.
*/
}
return "unknown";
}
static const char *plane_rotation(unsigned int rotation)
{
static char buf[48];
/*
* According to doc only one DRM_ROTATE_ is allowed but this
* will print them all to visualize if the values are misused
*/
snprintf(buf, sizeof(buf),
"%s%s%s%s%s%s(0x%08x)",
(rotation & BIT(DRM_ROTATE_0)) ? "0 " : "",
(rotation & BIT(DRM_ROTATE_90)) ? "90 " : "",
(rotation & BIT(DRM_ROTATE_180)) ? "180 " : "",
(rotation & BIT(DRM_ROTATE_270)) ? "270 " : "",
(rotation & BIT(DRM_REFLECT_X)) ? "FLIPX " : "",
(rotation & BIT(DRM_REFLECT_Y)) ? "FLIPY " : "",
rotation);
return buf;
}
static void intel_plane_info(struct seq_file *m, struct intel_crtc *intel_crtc)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct intel_plane *intel_plane;
for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
struct drm_plane_state *state;
struct drm_plane *plane = &intel_plane->base;
if (!plane->state) {
seq_puts(m, "plane->state is NULL!\n");
continue;
}
state = plane->state;
seq_printf(m, "\t--Plane id %d: type=%s, crtc_pos=%4dx%4d, crtc_size=%4dx%4d, src_pos=%d.%04ux%d.%04u, src_size=%d.%04ux%d.%04u, format=%s, rotation=%s\n",
plane->base.id,
plane_type(intel_plane->base.type),
state->crtc_x, state->crtc_y,
state->crtc_w, state->crtc_h,
(state->src_x >> 16),
((state->src_x & 0xffff) * 15625) >> 10,
(state->src_y >> 16),
((state->src_y & 0xffff) * 15625) >> 10,
(state->src_w >> 16),
((state->src_w & 0xffff) * 15625) >> 10,
(state->src_h >> 16),
((state->src_h & 0xffff) * 15625) >> 10,
state->fb ? drm_get_format_name(state->fb->pixel_format) : "N/A",
plane_rotation(state->rotation));
}
}
static void intel_scaler_info(struct seq_file *m, struct intel_crtc *intel_crtc)
{
struct intel_crtc_state *pipe_config;
int num_scalers = intel_crtc->num_scalers;
int i;
pipe_config = to_intel_crtc_state(intel_crtc->base.state);
/* Not all platformas have a scaler */
if (num_scalers) {
seq_printf(m, "\tnum_scalers=%d, scaler_users=%x scaler_id=%d",
num_scalers,
pipe_config->scaler_state.scaler_users,
pipe_config->scaler_state.scaler_id);
for (i = 0; i < SKL_NUM_SCALERS; i++) {
struct intel_scaler *sc =
&pipe_config->scaler_state.scalers[i];
seq_printf(m, ", scalers[%d]: use=%s, mode=%x",
i, yesno(sc->in_use), sc->mode);
}
seq_puts(m, "\n");
} else {
seq_puts(m, "\tNo scalers available on this platform\n");
}
}
static int i915_display_info(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_crtc *crtc;
struct drm_connector *connector;
intel_runtime_pm_get(dev_priv);
drm_modeset_lock_all(dev);
seq_printf(m, "CRTC info\n");
seq_printf(m, "---------\n");
for_each_intel_crtc(dev, crtc) {
bool active;
struct intel_crtc_state *pipe_config;
int x, y;
pipe_config = to_intel_crtc_state(crtc->base.state);
seq_printf(m, "CRTC %d: pipe: %c, active=%s, (size=%dx%d), dither=%s, bpp=%d\n",
crtc->base.base.id, pipe_name(crtc->pipe),
yesno(pipe_config->base.active),
pipe_config->pipe_src_w, pipe_config->pipe_src_h,
yesno(pipe_config->dither), pipe_config->pipe_bpp);
if (pipe_config->base.active) {
intel_crtc_info(m, crtc);
active = cursor_position(dev, crtc->pipe, &x, &y);
seq_printf(m, "\tcursor visible? %s, position (%d, %d), size %dx%d, addr 0x%08x, active? %s\n",
yesno(crtc->cursor_base),
drm/i915: Kill intel_crtc->cursor_{width, height} (v2) The cursor size fields in intel_crtc just duplicate the data from cursor->state.crtc_{w,h} so we don't need them any more. Worse, their use in the watermark code actually introduces a subtle bug since they don't get updated to mirror the state values until the plane commit stage, which is *after* we've already used them to calculate new watermark values. This happens because we had to move watermark updates slightly earlier (outside vblank evasion) in commit commit 32b7eeec4d1e861230b09d437e95d76c86ff4a68 Author: Matt Roper <matthew.d.roper@intel.com> Date: Wed Dec 24 07:59:06 2014 -0800 drm/i915: Refactor work that can sleep out of commit (v7) Dropping the intel_crtc fields and just using the state values (which are properly updated by the time watermark updates happen) should solve the problem. Aside from the actual removal of the struct fields (which are formatted in a way that I couldn't figure out how to match in Coccinelle), the rest of this patch was generated via the following semantic patch: // Drop assignment @@ struct intel_crtc *C; struct drm_plane_state S; @@ ( - C->cursor_width = S.crtc_w; | - C->cursor_height = S.crtc_h; ) // Replace usage @@ struct intel_crtc *C; expression E; @@ ( - C->cursor_width + C->base.cursor->state->crtc_w | - C->cursor_height + C->base.cursor->state->crtc_h | - to_intel_crtc(E)->cursor_width + E->cursor->state->crtc_w | - to_intel_crtc(E)->cursor_height + E->cursor->state->crtc_h ) v2: Rebase Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Joe Konno <joe.konno@linux.intel.com> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89346 Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-28 01:12:00 +07:00
x, y, crtc->base.cursor->state->crtc_w,
crtc->base.cursor->state->crtc_h,
crtc->cursor_addr, yesno(active));
intel_scaler_info(m, crtc);
intel_plane_info(m, crtc);
}
seq_printf(m, "\tunderrun reporting: cpu=%s pch=%s \n",
yesno(!crtc->cpu_fifo_underrun_disabled),
yesno(!crtc->pch_fifo_underrun_disabled));
}
seq_printf(m, "\n");
seq_printf(m, "Connector info\n");
seq_printf(m, "--------------\n");
list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
intel_connector_info(m, connector);
}
drm_modeset_unlock_all(dev);
intel_runtime_pm_put(dev_priv);
return 0;
}
static int i915_semaphore_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_engine_cs *engine;
int num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
enum intel_engine_id id;
int j, ret;
if (!i915.semaphores) {
seq_puts(m, "Semaphores are disabled\n");
return 0;
}
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
if (IS_BROADWELL(dev)) {
struct page *page;
uint64_t *seqno;
page = i915_gem_object_get_page(dev_priv->semaphore_obj, 0);
seqno = (uint64_t *)kmap_atomic(page);
for_each_engine_id(engine, dev_priv, id) {
uint64_t offset;
seq_printf(m, "%s\n", engine->name);
seq_puts(m, " Last signal:");
for (j = 0; j < num_rings; j++) {
offset = id * I915_NUM_ENGINES + j;
seq_printf(m, "0x%08llx (0x%02llx) ",
seqno[offset], offset * 8);
}
seq_putc(m, '\n');
seq_puts(m, " Last wait: ");
for (j = 0; j < num_rings; j++) {
offset = id + (j * I915_NUM_ENGINES);
seq_printf(m, "0x%08llx (0x%02llx) ",
seqno[offset], offset * 8);
}
seq_putc(m, '\n');
}
kunmap_atomic(seqno);
} else {
seq_puts(m, " Last signal:");
for_each_engine(engine, dev_priv)
for (j = 0; j < num_rings; j++)
seq_printf(m, "0x%08x\n",
I915_READ(engine->semaphore.mbox.signal[j]));
seq_putc(m, '\n');
}
seq_puts(m, "\nSync seqno:\n");
for_each_engine(engine, dev_priv) {
for (j = 0; j < num_rings; j++)
seq_printf(m, " 0x%08x ",
engine->semaphore.sync_seqno[j]);
seq_putc(m, '\n');
}
seq_putc(m, '\n');
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int i915_shared_dplls_info(struct seq_file *m, void *unused)
{
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
int i;
drm_modeset_lock_all(dev);
for (i = 0; i < dev_priv->num_shared_dpll; i++) {
struct intel_shared_dpll *pll = &dev_priv->shared_dplls[i];
seq_printf(m, "DPLL%i: %s, id: %i\n", i, pll->name, pll->id);
seq_printf(m, " crtc_mask: 0x%08x, active: 0x%x, on: %s\n",
pll->config.crtc_mask, pll->active_mask, yesno(pll->on));
seq_printf(m, " tracked hardware state:\n");
seq_printf(m, " dpll: 0x%08x\n", pll->config.hw_state.dpll);
seq_printf(m, " dpll_md: 0x%08x\n",
pll->config.hw_state.dpll_md);
seq_printf(m, " fp0: 0x%08x\n", pll->config.hw_state.fp0);
seq_printf(m, " fp1: 0x%08x\n", pll->config.hw_state.fp1);
seq_printf(m, " wrpll: 0x%08x\n", pll->config.hw_state.wrpll);
}
drm_modeset_unlock_all(dev);
return 0;
}
static int i915_wa_registers(struct seq_file *m, void *unused)
{
int i;
int ret;
struct intel_engine_cs *engine;
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
drm/i915/gen9: Add framework to whitelist specific GPU registers Some of the HW registers are privileged and cannot be written to from non-privileged batch buffers coming from userspace unless they are added to the HW whitelist. This whitelist is maintained by HW and it is different from SW whitelist. Userspace need write access to them to implement preemption related WA. The reason for using this approach is, the register bits that control preemption granularity at the HW level are not context save/restored; so even if we set these bits always in kernel they are going to change once the context is switched out. We can consider making them non-privileged by default but these registers also contain other chicken bits which should not be allowed to be modified. In the later revisions controlling bits are save/restored at context level but in the existing revisions these are exported via other debug registers and should be on the whitelist. This patch adds changes to provide HW with a list of registers to be whitelisted. HW checks this list during execution and provides access accordingly. HW imposes a limit on the number of registers on whitelist and it is per-engine. At this point we are only enabling whitelist for RCS and we don't foresee any requirement for other engines. The registers to be whitelisted are added using generic workaround list mechanism, even these are only enablers for userspace workarounds. But by sharing this mechanism we get some test assets without additional cost (Mika). v2: rebase v3: parameterize RING_FORCE_TO_NONPRIV() as _MMIO() should be limited to i915_reg.h (Ville), drop inline for wa_ring_whitelist_reg (Mika). v4: improvements suggested by Chris Wilson. Clarify that this is HW whitelist and different from the one maintained in driver. This list is engine specific but it gets initialized along with other WA which is RCS specific thing, so make it clear that we are not doing any cross engine setup during initialization. Make HW whitelist count of each engine available in debugfs. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1453412634-29238-2-git-send-email-arun.siluvery@linux.intel.com Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2016-01-22 04:43:47 +07:00
struct i915_workarounds *workarounds = &dev_priv->workarounds;
enum intel_engine_id id;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
drm/i915/gen9: Add framework to whitelist specific GPU registers Some of the HW registers are privileged and cannot be written to from non-privileged batch buffers coming from userspace unless they are added to the HW whitelist. This whitelist is maintained by HW and it is different from SW whitelist. Userspace need write access to them to implement preemption related WA. The reason for using this approach is, the register bits that control preemption granularity at the HW level are not context save/restored; so even if we set these bits always in kernel they are going to change once the context is switched out. We can consider making them non-privileged by default but these registers also contain other chicken bits which should not be allowed to be modified. In the later revisions controlling bits are save/restored at context level but in the existing revisions these are exported via other debug registers and should be on the whitelist. This patch adds changes to provide HW with a list of registers to be whitelisted. HW checks this list during execution and provides access accordingly. HW imposes a limit on the number of registers on whitelist and it is per-engine. At this point we are only enabling whitelist for RCS and we don't foresee any requirement for other engines. The registers to be whitelisted are added using generic workaround list mechanism, even these are only enablers for userspace workarounds. But by sharing this mechanism we get some test assets without additional cost (Mika). v2: rebase v3: parameterize RING_FORCE_TO_NONPRIV() as _MMIO() should be limited to i915_reg.h (Ville), drop inline for wa_ring_whitelist_reg (Mika). v4: improvements suggested by Chris Wilson. Clarify that this is HW whitelist and different from the one maintained in driver. This list is engine specific but it gets initialized along with other WA which is RCS specific thing, so make it clear that we are not doing any cross engine setup during initialization. Make HW whitelist count of each engine available in debugfs. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1453412634-29238-2-git-send-email-arun.siluvery@linux.intel.com Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2016-01-22 04:43:47 +07:00
seq_printf(m, "Workarounds applied: %d\n", workarounds->count);
for_each_engine_id(engine, dev_priv, id)
drm/i915/gen9: Add framework to whitelist specific GPU registers Some of the HW registers are privileged and cannot be written to from non-privileged batch buffers coming from userspace unless they are added to the HW whitelist. This whitelist is maintained by HW and it is different from SW whitelist. Userspace need write access to them to implement preemption related WA. The reason for using this approach is, the register bits that control preemption granularity at the HW level are not context save/restored; so even if we set these bits always in kernel they are going to change once the context is switched out. We can consider making them non-privileged by default but these registers also contain other chicken bits which should not be allowed to be modified. In the later revisions controlling bits are save/restored at context level but in the existing revisions these are exported via other debug registers and should be on the whitelist. This patch adds changes to provide HW with a list of registers to be whitelisted. HW checks this list during execution and provides access accordingly. HW imposes a limit on the number of registers on whitelist and it is per-engine. At this point we are only enabling whitelist for RCS and we don't foresee any requirement for other engines. The registers to be whitelisted are added using generic workaround list mechanism, even these are only enablers for userspace workarounds. But by sharing this mechanism we get some test assets without additional cost (Mika). v2: rebase v3: parameterize RING_FORCE_TO_NONPRIV() as _MMIO() should be limited to i915_reg.h (Ville), drop inline for wa_ring_whitelist_reg (Mika). v4: improvements suggested by Chris Wilson. Clarify that this is HW whitelist and different from the one maintained in driver. This list is engine specific but it gets initialized along with other WA which is RCS specific thing, so make it clear that we are not doing any cross engine setup during initialization. Make HW whitelist count of each engine available in debugfs. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1453412634-29238-2-git-send-email-arun.siluvery@linux.intel.com Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2016-01-22 04:43:47 +07:00
seq_printf(m, "HW whitelist count for %s: %d\n",
engine->name, workarounds->hw_whitelist_count[id]);
drm/i915/gen9: Add framework to whitelist specific GPU registers Some of the HW registers are privileged and cannot be written to from non-privileged batch buffers coming from userspace unless they are added to the HW whitelist. This whitelist is maintained by HW and it is different from SW whitelist. Userspace need write access to them to implement preemption related WA. The reason for using this approach is, the register bits that control preemption granularity at the HW level are not context save/restored; so even if we set these bits always in kernel they are going to change once the context is switched out. We can consider making them non-privileged by default but these registers also contain other chicken bits which should not be allowed to be modified. In the later revisions controlling bits are save/restored at context level but in the existing revisions these are exported via other debug registers and should be on the whitelist. This patch adds changes to provide HW with a list of registers to be whitelisted. HW checks this list during execution and provides access accordingly. HW imposes a limit on the number of registers on whitelist and it is per-engine. At this point we are only enabling whitelist for RCS and we don't foresee any requirement for other engines. The registers to be whitelisted are added using generic workaround list mechanism, even these are only enablers for userspace workarounds. But by sharing this mechanism we get some test assets without additional cost (Mika). v2: rebase v3: parameterize RING_FORCE_TO_NONPRIV() as _MMIO() should be limited to i915_reg.h (Ville), drop inline for wa_ring_whitelist_reg (Mika). v4: improvements suggested by Chris Wilson. Clarify that this is HW whitelist and different from the one maintained in driver. This list is engine specific but it gets initialized along with other WA which is RCS specific thing, so make it clear that we are not doing any cross engine setup during initialization. Make HW whitelist count of each engine available in debugfs. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1453412634-29238-2-git-send-email-arun.siluvery@linux.intel.com Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2016-01-22 04:43:47 +07:00
for (i = 0; i < workarounds->count; ++i) {
drm/i915: Type safe register read/write Make I915_READ and I915_WRITE more type safe by wrapping the register offset in a struct. This should eliminate most of the fumbles we've had with misplaced parens. This only takes care of normal mmio registers. We could extend the idea to other register types and define each with its own struct. That way you wouldn't be able to accidentally pass the wrong thing to a specific register access function. The gpio_reg setup is probably the ugliest thing left. But I figure I'd just leave it for now, and wait for some divine inspiration to strike before making it nice. As for the generated code, it's actually a bit better sometimes. Eg. looking at i915_irq_handler(), we can see the following change: lea 0x70024(%rdx,%rax,1),%r9d mov $0x1,%edx - movslq %r9d,%r9 - mov %r9,%rsi - mov %r9,-0x58(%rbp) - callq *0xd8(%rbx) + mov %r9d,%esi + mov %r9d,-0x48(%rbp) callq *0xd8(%rbx) So previously gcc thought the register offset might be signed and decided to sign extend it, just in case. The rest appears to be mostly just minor shuffling of instructions. v2: i915_mmio_reg_{offset,equal,valid}() helpers added s/_REG/_MMIO/ in the register defines mo more switch statements left to worry about ring_emit stuff got sorted in a prep patch cmd parser, lrc context and w/a batch buildup also in prep patch vgpu stuff cleaned up and moved to a prep patch all other unrelated changes split out v3: Rebased due to BXT DSI/BLC, MOCS, etc. v4: Rebased due to churn, s/i915_mmio_reg_t/i915_reg_t/ Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/1447853606-2751-1-git-send-email-ville.syrjala@linux.intel.com
2015-11-18 20:33:26 +07:00
i915_reg_t addr;
u32 mask, value, read;
bool ok;
drm/i915/gen9: Add framework to whitelist specific GPU registers Some of the HW registers are privileged and cannot be written to from non-privileged batch buffers coming from userspace unless they are added to the HW whitelist. This whitelist is maintained by HW and it is different from SW whitelist. Userspace need write access to them to implement preemption related WA. The reason for using this approach is, the register bits that control preemption granularity at the HW level are not context save/restored; so even if we set these bits always in kernel they are going to change once the context is switched out. We can consider making them non-privileged by default but these registers also contain other chicken bits which should not be allowed to be modified. In the later revisions controlling bits are save/restored at context level but in the existing revisions these are exported via other debug registers and should be on the whitelist. This patch adds changes to provide HW with a list of registers to be whitelisted. HW checks this list during execution and provides access accordingly. HW imposes a limit on the number of registers on whitelist and it is per-engine. At this point we are only enabling whitelist for RCS and we don't foresee any requirement for other engines. The registers to be whitelisted are added using generic workaround list mechanism, even these are only enablers for userspace workarounds. But by sharing this mechanism we get some test assets without additional cost (Mika). v2: rebase v3: parameterize RING_FORCE_TO_NONPRIV() as _MMIO() should be limited to i915_reg.h (Ville), drop inline for wa_ring_whitelist_reg (Mika). v4: improvements suggested by Chris Wilson. Clarify that this is HW whitelist and different from the one maintained in driver. This list is engine specific but it gets initialized along with other WA which is RCS specific thing, so make it clear that we are not doing any cross engine setup during initialization. Make HW whitelist count of each engine available in debugfs. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1453412634-29238-2-git-send-email-arun.siluvery@linux.intel.com Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2016-01-22 04:43:47 +07:00
addr = workarounds->reg[i].addr;
mask = workarounds->reg[i].mask;
value = workarounds->reg[i].value;
read = I915_READ(addr);
ok = (value & mask) == (read & mask);
seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X, read: 0x%08x, status: %s\n",
drm/i915: Type safe register read/write Make I915_READ and I915_WRITE more type safe by wrapping the register offset in a struct. This should eliminate most of the fumbles we've had with misplaced parens. This only takes care of normal mmio registers. We could extend the idea to other register types and define each with its own struct. That way you wouldn't be able to accidentally pass the wrong thing to a specific register access function. The gpio_reg setup is probably the ugliest thing left. But I figure I'd just leave it for now, and wait for some divine inspiration to strike before making it nice. As for the generated code, it's actually a bit better sometimes. Eg. looking at i915_irq_handler(), we can see the following change: lea 0x70024(%rdx,%rax,1),%r9d mov $0x1,%edx - movslq %r9d,%r9 - mov %r9,%rsi - mov %r9,-0x58(%rbp) - callq *0xd8(%rbx) + mov %r9d,%esi + mov %r9d,-0x48(%rbp) callq *0xd8(%rbx) So previously gcc thought the register offset might be signed and decided to sign extend it, just in case. The rest appears to be mostly just minor shuffling of instructions. v2: i915_mmio_reg_{offset,equal,valid}() helpers added s/_REG/_MMIO/ in the register defines mo more switch statements left to worry about ring_emit stuff got sorted in a prep patch cmd parser, lrc context and w/a batch buildup also in prep patch vgpu stuff cleaned up and moved to a prep patch all other unrelated changes split out v3: Rebased due to BXT DSI/BLC, MOCS, etc. v4: Rebased due to churn, s/i915_mmio_reg_t/i915_reg_t/ Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/1447853606-2751-1-git-send-email-ville.syrjala@linux.intel.com
2015-11-18 20:33:26 +07:00
i915_mmio_reg_offset(addr), value, mask, read, ok ? "OK" : "FAIL");
}
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
return 0;
}
static int i915_ddb_info(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct skl_ddb_allocation *ddb;
struct skl_ddb_entry *entry;
enum pipe pipe;
int plane;
if (INTEL_INFO(dev)->gen < 9)
return 0;
drm_modeset_lock_all(dev);
ddb = &dev_priv->wm.skl_hw.ddb;
seq_printf(m, "%-15s%8s%8s%8s\n", "", "Start", "End", "Size");
for_each_pipe(dev_priv, pipe) {
seq_printf(m, "Pipe %c\n", pipe_name(pipe));
for_each_plane(dev_priv, pipe, plane) {
entry = &ddb->plane[pipe][plane];
seq_printf(m, " Plane%-8d%8u%8u%8u\n", plane + 1,
entry->start, entry->end,
skl_ddb_entry_size(entry));
}
drm/i915/skl: Simplify wm structures slightly (v2) A bunch of SKL watermark-related structures have the cursor plane as a separate entry from the rest of the planes. Since a previous patch updated I915_MAX_PLANES such that those plane arrays now have a slot for the cursor, update the code to use the new slot in the existing plane arrays and kill off the cursor-specific structures. There shouldn't be any functional change here; this is just shuffling around how the data is stored in some of the data structures. The whole patch is generated with Coccinelle via the following semantic patch: @@ struct skl_pipe_wm_parameters WMP; @@ - WMP.cursor + WMP.plane[PLANE_CURSOR] @@ struct skl_pipe_wm_parameters *WMP; @@ - WMP->cursor + WMP->plane[PLANE_CURSOR] @@ @@ struct skl_pipe_wm_parameters { ... - struct intel_plane_wm_parameters cursor; ... }; @@ struct skl_ddb_allocation DDB; expression E; @@ - DDB.cursor[E] + DDB.plane[E][PLANE_CURSOR] @@ struct skl_ddb_allocation *DDB; expression E; @@ - DDB->cursor[E] + DDB->plane[E][PLANE_CURSOR] @@ @@ struct skl_ddb_allocation { ... - struct skl_ddb_entry cursor[I915_MAX_PIPES]; ... }; @@ struct skl_wm_values WMV; expression E1, E2; @@ ( - WMV.cursor[E1][E2] + WMV.plane[E1][PLANE_CURSOR][E2] | - WMV.cursor_trans[E1] + WMV.plane_trans[E1][PLANE_CURSOR] ) @@ struct skl_wm_values *WMV; expression E1, E2; @@ ( - WMV->cursor[E1][E2] + WMV->plane[E1][PLANE_CURSOR][E2] | - WMV->cursor_trans[E1] + WMV->plane_trans[E1][PLANE_CURSOR] ) @@ @@ struct skl_wm_values { ... - uint32_t cursor[I915_MAX_PIPES][8]; ... - uint32_t cursor_trans[I915_MAX_PIPES]; ... }; @@ struct skl_wm_level WML; @@ ( - WML.cursor_en + WML.plane_en[PLANE_CURSOR] | - WML.cursor_res_b + WML.plane_res_b[PLANE_CURSOR] | - WML.cursor_res_l + WML.plane_res_l[PLANE_CURSOR] ) @@ struct skl_wm_level *WML; @@ ( - WML->cursor_en + WML->plane_en[PLANE_CURSOR] | - WML->cursor_res_b + WML->plane_res_b[PLANE_CURSOR] | - WML->cursor_res_l + WML->plane_res_l[PLANE_CURSOR] ) @@ @@ struct skl_wm_level { ... - bool cursor_en; ... - uint16_t cursor_res_b; - uint8_t cursor_res_l; ... }; v2: Use a PLANE_CURSOR enum entry rather than making the code reference I915_MAX_PLANES or I915_MAX_PLANES+1, which was confusing. (Ander) Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-09-25 05:53:10 +07:00
entry = &ddb->plane[pipe][PLANE_CURSOR];
seq_printf(m, " %-13s%8u%8u%8u\n", "Cursor", entry->start,
entry->end, skl_ddb_entry_size(entry));
}
drm_modeset_unlock_all(dev);
return 0;
}
static void drrs_status_per_crtc(struct seq_file *m,
struct drm_device *dev, struct intel_crtc *intel_crtc)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_drrs *drrs = &dev_priv->drrs;
int vrefresh = 0;
struct drm_connector *connector;
drm_for_each_connector(connector, dev) {
if (connector->state->crtc != &intel_crtc->base)
continue;
seq_printf(m, "%s:\n", connector->name);
}
if (dev_priv->vbt.drrs_type == STATIC_DRRS_SUPPORT)
seq_puts(m, "\tVBT: DRRS_type: Static");
else if (dev_priv->vbt.drrs_type == SEAMLESS_DRRS_SUPPORT)
seq_puts(m, "\tVBT: DRRS_type: Seamless");
else if (dev_priv->vbt.drrs_type == DRRS_NOT_SUPPORTED)
seq_puts(m, "\tVBT: DRRS_type: None");
else
seq_puts(m, "\tVBT: DRRS_type: FIXME: Unrecognized Value");
seq_puts(m, "\n\n");
if (to_intel_crtc_state(intel_crtc->base.state)->has_drrs) {
struct intel_panel *panel;
mutex_lock(&drrs->mutex);
/* DRRS Supported */
seq_puts(m, "\tDRRS Supported: Yes\n");
/* disable_drrs() will make drrs->dp NULL */
if (!drrs->dp) {
seq_puts(m, "Idleness DRRS: Disabled");
mutex_unlock(&drrs->mutex);
return;
}
panel = &drrs->dp->attached_connector->panel;
seq_printf(m, "\t\tBusy_frontbuffer_bits: 0x%X",
drrs->busy_frontbuffer_bits);
seq_puts(m, "\n\t\t");
if (drrs->refresh_rate_type == DRRS_HIGH_RR) {
seq_puts(m, "DRRS_State: DRRS_HIGH_RR\n");
vrefresh = panel->fixed_mode->vrefresh;
} else if (drrs->refresh_rate_type == DRRS_LOW_RR) {
seq_puts(m, "DRRS_State: DRRS_LOW_RR\n");
vrefresh = panel->downclock_mode->vrefresh;
} else {
seq_printf(m, "DRRS_State: Unknown(%d)\n",
drrs->refresh_rate_type);
mutex_unlock(&drrs->mutex);
return;
}
seq_printf(m, "\t\tVrefresh: %d", vrefresh);
seq_puts(m, "\n\t\t");
mutex_unlock(&drrs->mutex);
} else {
/* DRRS not supported. Print the VBT parameter*/
seq_puts(m, "\tDRRS Supported : No");
}
seq_puts(m, "\n");
}
static int i915_drrs_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct intel_crtc *intel_crtc;
int active_crtc_cnt = 0;
drm_modeset_lock_all(dev);
for_each_intel_crtc(dev, intel_crtc) {
if (intel_crtc->base.state->active) {
active_crtc_cnt++;
seq_printf(m, "\nCRTC %d: ", active_crtc_cnt);
drrs_status_per_crtc(m, dev, intel_crtc);
}
}
drm_modeset_unlock_all(dev);
if (!active_crtc_cnt)
seq_puts(m, "No active crtc found\n");
return 0;
}
struct pipe_crc_info {
const char *name;
struct drm_device *dev;
enum pipe pipe;
};
static int i915_dp_mst_info(struct seq_file *m, void *unused)
{
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct intel_encoder *intel_encoder;
struct intel_digital_port *intel_dig_port;
struct drm_connector *connector;
drm_modeset_lock_all(dev);
drm_for_each_connector(connector, dev) {
if (connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort)
continue;
intel_encoder = intel_attached_encoder(connector);
if (!intel_encoder || intel_encoder->type == INTEL_OUTPUT_DP_MST)
continue;
intel_dig_port = enc_to_dig_port(&intel_encoder->base);
if (!intel_dig_port->dp.can_mst)
continue;
seq_printf(m, "MST Source Port %c\n",
port_name(intel_dig_port->port));
drm_dp_mst_dump_topology(m, &intel_dig_port->dp.mst_mgr);
}
drm_modeset_unlock_all(dev);
return 0;
}
static int i915_pipe_crc_open(struct inode *inode, struct file *filep)
{
struct pipe_crc_info *info = inode->i_private;
struct drm_i915_private *dev_priv = to_i915(info->dev);
struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[info->pipe];
if (info->pipe >= INTEL_INFO(info->dev)->num_pipes)
return -ENODEV;
spin_lock_irq(&pipe_crc->lock);
if (pipe_crc->opened) {
spin_unlock_irq(&pipe_crc->lock);
return -EBUSY; /* already open */
}
pipe_crc->opened = true;
filep->private_data = inode->i_private;
spin_unlock_irq(&pipe_crc->lock);
return 0;
}
static int i915_pipe_crc_release(struct inode *inode, struct file *filep)
{
struct pipe_crc_info *info = inode->i_private;
struct drm_i915_private *dev_priv = to_i915(info->dev);
struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[info->pipe];
spin_lock_irq(&pipe_crc->lock);
pipe_crc->opened = false;
spin_unlock_irq(&pipe_crc->lock);
return 0;
}
/* (6 fields, 8 chars each, space separated (5) + '\n') */
#define PIPE_CRC_LINE_LEN (6 * 8 + 5 + 1)
/* account for \'0' */
#define PIPE_CRC_BUFFER_LEN (PIPE_CRC_LINE_LEN + 1)
static int pipe_crc_data_count(struct intel_pipe_crc *pipe_crc)
{
assert_spin_locked(&pipe_crc->lock);
return CIRC_CNT(pipe_crc->head, pipe_crc->tail,
INTEL_PIPE_CRC_ENTRIES_NR);
}
static ssize_t
i915_pipe_crc_read(struct file *filep, char __user *user_buf, size_t count,
loff_t *pos)
{
struct pipe_crc_info *info = filep->private_data;
struct drm_device *dev = info->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[info->pipe];
char buf[PIPE_CRC_BUFFER_LEN];
int n_entries;
ssize_t bytes_read;
/*
* Don't allow user space to provide buffers not big enough to hold
* a line of data.
*/
if (count < PIPE_CRC_LINE_LEN)
return -EINVAL;
if (pipe_crc->source == INTEL_PIPE_CRC_SOURCE_NONE)
return 0;
/* nothing to read */
spin_lock_irq(&pipe_crc->lock);
while (pipe_crc_data_count(pipe_crc) == 0) {
int ret;
if (filep->f_flags & O_NONBLOCK) {
spin_unlock_irq(&pipe_crc->lock);
return -EAGAIN;
}
ret = wait_event_interruptible_lock_irq(pipe_crc->wq,
pipe_crc_data_count(pipe_crc), pipe_crc->lock);
if (ret) {
spin_unlock_irq(&pipe_crc->lock);
return ret;
}
}
/* We now have one or more entries to read */
n_entries = count / PIPE_CRC_LINE_LEN;
bytes_read = 0;
while (n_entries > 0) {
struct intel_pipe_crc_entry *entry =
&pipe_crc->entries[pipe_crc->tail];
int ret;
if (CIRC_CNT(pipe_crc->head, pipe_crc->tail,
INTEL_PIPE_CRC_ENTRIES_NR) < 1)
break;
BUILD_BUG_ON_NOT_POWER_OF_2(INTEL_PIPE_CRC_ENTRIES_NR);
pipe_crc->tail = (pipe_crc->tail + 1) & (INTEL_PIPE_CRC_ENTRIES_NR - 1);
bytes_read += snprintf(buf, PIPE_CRC_BUFFER_LEN,
"%8u %8x %8x %8x %8x %8x\n",
entry->frame, entry->crc[0],
entry->crc[1], entry->crc[2],
entry->crc[3], entry->crc[4]);
spin_unlock_irq(&pipe_crc->lock);
ret = copy_to_user(user_buf, buf, PIPE_CRC_LINE_LEN);
if (ret == PIPE_CRC_LINE_LEN)
return -EFAULT;
user_buf += PIPE_CRC_LINE_LEN;
n_entries--;
spin_lock_irq(&pipe_crc->lock);
}
spin_unlock_irq(&pipe_crc->lock);
return bytes_read;
}
static const struct file_operations i915_pipe_crc_fops = {
.owner = THIS_MODULE,
.open = i915_pipe_crc_open,
.read = i915_pipe_crc_read,
.release = i915_pipe_crc_release,
};
static struct pipe_crc_info i915_pipe_crc_data[I915_MAX_PIPES] = {
{
.name = "i915_pipe_A_crc",
.pipe = PIPE_A,
},
{
.name = "i915_pipe_B_crc",
.pipe = PIPE_B,
},
{
.name = "i915_pipe_C_crc",
.pipe = PIPE_C,
},
};
static int i915_pipe_crc_create(struct dentry *root, struct drm_minor *minor,
enum pipe pipe)
{
struct drm_device *dev = minor->dev;
struct dentry *ent;
struct pipe_crc_info *info = &i915_pipe_crc_data[pipe];
info->dev = dev;
ent = debugfs_create_file(info->name, S_IRUGO, root, info,
&i915_pipe_crc_fops);
if (!ent)
return -ENOMEM;
return drm_add_fake_info_node(minor, ent, info);
}
static const char * const pipe_crc_sources[] = {
"none",
"plane1",
"plane2",
"pf",
"pipe",
"TV",
"DP-B",
"DP-C",
"DP-D",
"auto",
};
static const char *pipe_crc_source_name(enum intel_pipe_crc_source source)
{
BUILD_BUG_ON(ARRAY_SIZE(pipe_crc_sources) != INTEL_PIPE_CRC_SOURCE_MAX);
return pipe_crc_sources[source];
}
static int display_crc_ctl_show(struct seq_file *m, void *data)
{
struct drm_device *dev = m->private;
struct drm_i915_private *dev_priv = to_i915(dev);
int i;
for (i = 0; i < I915_MAX_PIPES; i++)
seq_printf(m, "%c %s\n", pipe_name(i),
pipe_crc_source_name(dev_priv->pipe_crc[i].source));
return 0;
}
static int display_crc_ctl_open(struct inode *inode, struct file *file)
{
struct drm_device *dev = inode->i_private;
return single_open(file, display_crc_ctl_show, dev);
}
static int i8xx_pipe_crc_ctl_reg(enum intel_pipe_crc_source *source,
uint32_t *val)
{
if (*source == INTEL_PIPE_CRC_SOURCE_AUTO)
*source = INTEL_PIPE_CRC_SOURCE_PIPE;
switch (*source) {
case INTEL_PIPE_CRC_SOURCE_PIPE:
*val = PIPE_CRC_ENABLE | PIPE_CRC_INCLUDE_BORDER_I8XX;
break;
case INTEL_PIPE_CRC_SOURCE_NONE:
*val = 0;
break;
default:
return -EINVAL;
}
return 0;
}
static int i9xx_pipe_crc_auto_source(struct drm_device *dev, enum pipe pipe,
enum intel_pipe_crc_source *source)
{
struct intel_encoder *encoder;
struct intel_crtc *crtc;
struct intel_digital_port *dig_port;
int ret = 0;
*source = INTEL_PIPE_CRC_SOURCE_PIPE;
drm: Split connection_mutex out of mode_config.mutex (v3) After the split-out of crtc locks from the big mode_config.mutex there's still two major areas it protects: - Various connector probe states, like connector->status, EDID properties, probed mode lists and similar information. - The links from connector->encoder and encoder->crtc and other modeset-relevant connector state (e.g. properties which control the panel fitter). The later is used by modeset operations. But they don't really care about the former since it's allowed to e.g. enable a disconnected VGA output or with a mode not in the probed list. Thus far this hasn't been a problem, but for the atomic modeset conversion Rob Clark needs to convert all modeset relevant locks into w/w locks. This is required because the order of acquisition is determined by how userspace supplies the atomic modeset data. This has run into troubles in the detect path since the i915 load detect code needs _both_ protections offered by the mode_config.mutex: It updates probe state and it needs to change the modeset configuration to enable the temporary load detect pipe. The big deal here is that for the probe/detect users of this lock a plain mutex fits best, but for atomic modesets we really want a w/w mutex. To fix this lets split out a new connection_mutex lock for the modeset relevant parts. For simplicity I've decided to only add one additional lock for all connector/encoder links and modeset configuration states. We have piles of different modeset objects in addition to those (like bridges or panels), so adding per-object locks would be much more effort. Also, we're guaranteed (at least for now) to do a full modeset if we need to acquire this lock. Which means that fine-grained locking is fairly irrelevant compared to the amount of time the full modeset will take. I've done a full audit, and there's just a few things that justify special focus: - Locking in drm_sysfs.c is almost completely absent. We should sprinkle mode_config.connection_mutex over this file a bit, but since it already lacks mode_config.mutex this patch wont make the situation any worse. This is material for a follow-up patch. - omap has a omap_framebuffer_flush function which walks the connector->encoder->crtc links and is called from many contexts. Some look like they don't acquire mode_config.mutex, so this is already racy. Again fixing this is material for a separate patch. - The radeon hot_plug function to retrain DP links looks at connector->dpms. Currently this happens without any locking, so is already racy. I think radeon_hotplug_work_func should gain mutex_lock/unlock calls for the mode_config.connection_mutex. - Same applies to i915's intel_dp_hot_plug. But again, this is already racy. - i915 load_detect code needs to acquire this lock. Which means the w/w dance due to Rob's work will be nicely contained to _just_ this function. I've added fixme comments everywhere where it looks suspicious but in the sysfs code. After a quick irc discussion with Dave Airlie it sounds like the lack of locking in there is due to sysfs cleanup fun at module unload. v1: original (only compile tested) v2: missing mutex_init(), etc (from Rob Clark) v3: i915 needs more care in the conversion: - Protect the edp pp logic with the connection_mutex. - Use connection_mutex in the backlight code due to get_pipe_from_connector. - Use drm_modeset_lock_all in suspend/resume paths. - Update lock checks in the overlay code. Cc: Alex Deucher <alexdeucher@gmail.com> Cc: Rob Clark <robdclark@gmail.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Rob Clark <robdclark@gmail.com>
2014-05-30 04:54:47 +07:00
drm_modeset_lock_all(dev);
for_each_intel_encoder(dev, encoder) {
if (!encoder->base.crtc)
continue;
crtc = to_intel_crtc(encoder->base.crtc);
if (crtc->pipe != pipe)
continue;
switch (encoder->type) {
case INTEL_OUTPUT_TVOUT:
*source = INTEL_PIPE_CRC_SOURCE_TV;
break;
case INTEL_OUTPUT_DP:
case INTEL_OUTPUT_EDP:
dig_port = enc_to_dig_port(&encoder->base);
switch (dig_port->port) {
case PORT_B:
*source = INTEL_PIPE_CRC_SOURCE_DP_B;
break;
case PORT_C:
*source = INTEL_PIPE_CRC_SOURCE_DP_C;
break;
case PORT_D:
*source = INTEL_PIPE_CRC_SOURCE_DP_D;
break;
default:
WARN(1, "nonexisting DP port %c\n",
port_name(dig_port->port));
break;
}
break;
default:
break;
}
}
drm: Split connection_mutex out of mode_config.mutex (v3) After the split-out of crtc locks from the big mode_config.mutex there's still two major areas it protects: - Various connector probe states, like connector->status, EDID properties, probed mode lists and similar information. - The links from connector->encoder and encoder->crtc and other modeset-relevant connector state (e.g. properties which control the panel fitter). The later is used by modeset operations. But they don't really care about the former since it's allowed to e.g. enable a disconnected VGA output or with a mode not in the probed list. Thus far this hasn't been a problem, but for the atomic modeset conversion Rob Clark needs to convert all modeset relevant locks into w/w locks. This is required because the order of acquisition is determined by how userspace supplies the atomic modeset data. This has run into troubles in the detect path since the i915 load detect code needs _both_ protections offered by the mode_config.mutex: It updates probe state and it needs to change the modeset configuration to enable the temporary load detect pipe. The big deal here is that for the probe/detect users of this lock a plain mutex fits best, but for atomic modesets we really want a w/w mutex. To fix this lets split out a new connection_mutex lock for the modeset relevant parts. For simplicity I've decided to only add one additional lock for all connector/encoder links and modeset configuration states. We have piles of different modeset objects in addition to those (like bridges or panels), so adding per-object locks would be much more effort. Also, we're guaranteed (at least for now) to do a full modeset if we need to acquire this lock. Which means that fine-grained locking is fairly irrelevant compared to the amount of time the full modeset will take. I've done a full audit, and there's just a few things that justify special focus: - Locking in drm_sysfs.c is almost completely absent. We should sprinkle mode_config.connection_mutex over this file a bit, but since it already lacks mode_config.mutex this patch wont make the situation any worse. This is material for a follow-up patch. - omap has a omap_framebuffer_flush function which walks the connector->encoder->crtc links and is called from many contexts. Some look like they don't acquire mode_config.mutex, so this is already racy. Again fixing this is material for a separate patch. - The radeon hot_plug function to retrain DP links looks at connector->dpms. Currently this happens without any locking, so is already racy. I think radeon_hotplug_work_func should gain mutex_lock/unlock calls for the mode_config.connection_mutex. - Same applies to i915's intel_dp_hot_plug. But again, this is already racy. - i915 load_detect code needs to acquire this lock. Which means the w/w dance due to Rob's work will be nicely contained to _just_ this function. I've added fixme comments everywhere where it looks suspicious but in the sysfs code. After a quick irc discussion with Dave Airlie it sounds like the lack of locking in there is due to sysfs cleanup fun at module unload. v1: original (only compile tested) v2: missing mutex_init(), etc (from Rob Clark) v3: i915 needs more care in the conversion: - Protect the edp pp logic with the connection_mutex. - Use connection_mutex in the backlight code due to get_pipe_from_connector. - Use drm_modeset_lock_all in suspend/resume paths. - Update lock checks in the overlay code. Cc: Alex Deucher <alexdeucher@gmail.com> Cc: Rob Clark <robdclark@gmail.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Rob Clark <robdclark@gmail.com>
2014-05-30 04:54:47 +07:00
drm_modeset_unlock_all(dev);
return ret;
}
static int vlv_pipe_crc_ctl_reg(struct drm_device *dev,
enum pipe pipe,
enum intel_pipe_crc_source *source,
uint32_t *val)
{
struct drm_i915_private *dev_priv = to_i915(dev);
bool need_stable_symbols = false;
if (*source == INTEL_PIPE_CRC_SOURCE_AUTO) {
int ret = i9xx_pipe_crc_auto_source(dev, pipe, source);
if (ret)
return ret;
}
switch (*source) {
case INTEL_PIPE_CRC_SOURCE_PIPE:
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_PIPE_VLV;
break;
case INTEL_PIPE_CRC_SOURCE_DP_B:
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_DP_B_VLV;
need_stable_symbols = true;
break;
case INTEL_PIPE_CRC_SOURCE_DP_C:
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_DP_C_VLV;
need_stable_symbols = true;
break;
case INTEL_PIPE_CRC_SOURCE_DP_D:
if (!IS_CHERRYVIEW(dev))
return -EINVAL;
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_DP_D_VLV;
need_stable_symbols = true;
break;
case INTEL_PIPE_CRC_SOURCE_NONE:
*val = 0;
break;
default:
return -EINVAL;
}
/*
* When the pipe CRC tap point is after the transcoders we need
* to tweak symbol-level features to produce a deterministic series of
* symbols for a given frame. We need to reset those features only once
* a frame (instead of every nth symbol):
* - DC-balance: used to ensure a better clock recovery from the data
* link (SDVO)
* - DisplayPort scrambling: used for EMI reduction
*/
if (need_stable_symbols) {
uint32_t tmp = I915_READ(PORT_DFT2_G4X);
tmp |= DC_BALANCE_RESET_VLV;
switch (pipe) {
case PIPE_A:
tmp |= PIPE_A_SCRAMBLE_RESET;
break;
case PIPE_B:
tmp |= PIPE_B_SCRAMBLE_RESET;
break;
case PIPE_C:
tmp |= PIPE_C_SCRAMBLE_RESET;
break;
default:
return -EINVAL;
}
I915_WRITE(PORT_DFT2_G4X, tmp);
}
return 0;
}
static int i9xx_pipe_crc_ctl_reg(struct drm_device *dev,
enum pipe pipe,
enum intel_pipe_crc_source *source,
uint32_t *val)
{
struct drm_i915_private *dev_priv = to_i915(dev);
bool need_stable_symbols = false;
if (*source == INTEL_PIPE_CRC_SOURCE_AUTO) {
int ret = i9xx_pipe_crc_auto_source(dev, pipe, source);
if (ret)
return ret;
}
switch (*source) {
case INTEL_PIPE_CRC_SOURCE_PIPE:
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_PIPE_I9XX;
break;
case INTEL_PIPE_CRC_SOURCE_TV:
if (!SUPPORTS_TV(dev))
return -EINVAL;
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_TV_PRE;
break;
case INTEL_PIPE_CRC_SOURCE_DP_B:
if (!IS_G4X(dev))
return -EINVAL;
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_DP_B_G4X;
need_stable_symbols = true;
break;
case INTEL_PIPE_CRC_SOURCE_DP_C:
if (!IS_G4X(dev))
return -EINVAL;
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_DP_C_G4X;
need_stable_symbols = true;
break;
case INTEL_PIPE_CRC_SOURCE_DP_D:
if (!IS_G4X(dev))
return -EINVAL;
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_DP_D_G4X;
need_stable_symbols = true;
break;
case INTEL_PIPE_CRC_SOURCE_NONE:
*val = 0;
break;
default:
return -EINVAL;
}
/*
* When the pipe CRC tap point is after the transcoders we need
* to tweak symbol-level features to produce a deterministic series of
* symbols for a given frame. We need to reset those features only once
* a frame (instead of every nth symbol):
* - DC-balance: used to ensure a better clock recovery from the data
* link (SDVO)
* - DisplayPort scrambling: used for EMI reduction
*/
if (need_stable_symbols) {
uint32_t tmp = I915_READ(PORT_DFT2_G4X);
WARN_ON(!IS_G4X(dev));
I915_WRITE(PORT_DFT_I9XX,
I915_READ(PORT_DFT_I9XX) | DC_BALANCE_RESET);
if (pipe == PIPE_A)
tmp |= PIPE_A_SCRAMBLE_RESET;
else
tmp |= PIPE_B_SCRAMBLE_RESET;
I915_WRITE(PORT_DFT2_G4X, tmp);
}
return 0;
}
static void vlv_undo_pipe_scramble_reset(struct drm_device *dev,
enum pipe pipe)
{
struct drm_i915_private *dev_priv = to_i915(dev);
uint32_t tmp = I915_READ(PORT_DFT2_G4X);
switch (pipe) {
case PIPE_A:
tmp &= ~PIPE_A_SCRAMBLE_RESET;
break;
case PIPE_B:
tmp &= ~PIPE_B_SCRAMBLE_RESET;
break;
case PIPE_C:
tmp &= ~PIPE_C_SCRAMBLE_RESET;
break;
default:
return;
}
if (!(tmp & PIPE_SCRAMBLE_RESET_MASK))
tmp &= ~DC_BALANCE_RESET_VLV;
I915_WRITE(PORT_DFT2_G4X, tmp);
}
static void g4x_undo_pipe_scramble_reset(struct drm_device *dev,
enum pipe pipe)
{
struct drm_i915_private *dev_priv = to_i915(dev);
uint32_t tmp = I915_READ(PORT_DFT2_G4X);
if (pipe == PIPE_A)
tmp &= ~PIPE_A_SCRAMBLE_RESET;
else
tmp &= ~PIPE_B_SCRAMBLE_RESET;
I915_WRITE(PORT_DFT2_G4X, tmp);
if (!(tmp & PIPE_SCRAMBLE_RESET_MASK)) {
I915_WRITE(PORT_DFT_I9XX,
I915_READ(PORT_DFT_I9XX) & ~DC_BALANCE_RESET);
}
}
static int ilk_pipe_crc_ctl_reg(enum intel_pipe_crc_source *source,
uint32_t *val)
{
if (*source == INTEL_PIPE_CRC_SOURCE_AUTO)
*source = INTEL_PIPE_CRC_SOURCE_PIPE;
switch (*source) {
case INTEL_PIPE_CRC_SOURCE_PLANE1:
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_PRIMARY_ILK;
break;
case INTEL_PIPE_CRC_SOURCE_PLANE2:
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_SPRITE_ILK;
break;
case INTEL_PIPE_CRC_SOURCE_PIPE:
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_PIPE_ILK;
break;
case INTEL_PIPE_CRC_SOURCE_NONE:
*val = 0;
break;
default:
return -EINVAL;
}
return 0;
}
static void hsw_trans_edp_pipe_A_crc_wa(struct drm_device *dev, bool enable)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_crtc *crtc =
to_intel_crtc(dev_priv->pipe_to_crtc_mapping[PIPE_A]);
struct intel_crtc_state *pipe_config;
struct drm_atomic_state *state;
int ret = 0;
drm_modeset_lock_all(dev);
state = drm_atomic_state_alloc(dev);
if (!state) {
ret = -ENOMEM;
goto out;
}
state->acquire_ctx = drm_modeset_legacy_acquire_ctx(&crtc->base);
pipe_config = intel_atomic_get_crtc_state(state, crtc);
if (IS_ERR(pipe_config)) {
ret = PTR_ERR(pipe_config);
goto out;
}
pipe_config->pch_pfit.force_thru = enable;
if (pipe_config->cpu_transcoder == TRANSCODER_EDP &&
pipe_config->pch_pfit.enabled != enable)
pipe_config->base.connectors_changed = true;
ret = drm_atomic_commit(state);
out:
drm_modeset_unlock_all(dev);
WARN(ret, "Toggling workaround to %i returns %i\n", enable, ret);
if (ret)
drm_atomic_state_free(state);
}
static int ivb_pipe_crc_ctl_reg(struct drm_device *dev,
enum pipe pipe,
enum intel_pipe_crc_source *source,
uint32_t *val)
{
if (*source == INTEL_PIPE_CRC_SOURCE_AUTO)
*source = INTEL_PIPE_CRC_SOURCE_PF;
switch (*source) {
case INTEL_PIPE_CRC_SOURCE_PLANE1:
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_PRIMARY_IVB;
break;
case INTEL_PIPE_CRC_SOURCE_PLANE2:
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_SPRITE_IVB;
break;
case INTEL_PIPE_CRC_SOURCE_PF:
if (IS_HASWELL(dev) && pipe == PIPE_A)
hsw_trans_edp_pipe_A_crc_wa(dev, true);
*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_PF_IVB;
break;
case INTEL_PIPE_CRC_SOURCE_NONE:
*val = 0;
break;
default:
return -EINVAL;
}
return 0;
}
static int pipe_crc_set_source(struct drm_device *dev, enum pipe pipe,
enum intel_pipe_crc_source source)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[pipe];
struct intel_crtc *crtc = to_intel_crtc(intel_get_crtc_for_pipe(dev,
pipe));
enum intel_display_power_domain power_domain;
u32 val = 0; /* shut up gcc */
int ret;
if (pipe_crc->source == source)
return 0;
/* forbid changing the source without going back to 'none' */
if (pipe_crc->source && source)
return -EINVAL;
power_domain = POWER_DOMAIN_PIPE(pipe);
if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) {
DRM_DEBUG_KMS("Trying to capture CRC while pipe is off\n");
return -EIO;
}
if (IS_GEN2(dev))
ret = i8xx_pipe_crc_ctl_reg(&source, &val);
else if (INTEL_INFO(dev)->gen < 5)
ret = i9xx_pipe_crc_ctl_reg(dev, pipe, &source, &val);
else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev))
ret = vlv_pipe_crc_ctl_reg(dev, pipe, &source, &val);
else if (IS_GEN5(dev) || IS_GEN6(dev))
ret = ilk_pipe_crc_ctl_reg(&source, &val);
else
ret = ivb_pipe_crc_ctl_reg(dev, pipe, &source, &val);
if (ret != 0)
goto out;
/* none -> real source transition */
if (source) {
struct intel_pipe_crc_entry *entries;
DRM_DEBUG_DRIVER("collecting CRCs for pipe %c, %s\n",
pipe_name(pipe), pipe_crc_source_name(source));
entries = kcalloc(INTEL_PIPE_CRC_ENTRIES_NR,
sizeof(pipe_crc->entries[0]),
GFP_KERNEL);
if (!entries) {
ret = -ENOMEM;
goto out;
}
/*
* When IPS gets enabled, the pipe CRC changes. Since IPS gets
* enabled and disabled dynamically based on package C states,
* user space can't make reliable use of the CRCs, so let's just
* completely disable it.
*/
hsw_disable_ips(crtc);
spin_lock_irq(&pipe_crc->lock);
kfree(pipe_crc->entries);
pipe_crc->entries = entries;
pipe_crc->head = 0;
pipe_crc->tail = 0;
spin_unlock_irq(&pipe_crc->lock);
}
pipe_crc->source = source;
I915_WRITE(PIPE_CRC_CTL(pipe), val);
POSTING_READ(PIPE_CRC_CTL(pipe));
/* real source -> none transition */
if (source == INTEL_PIPE_CRC_SOURCE_NONE) {
struct intel_pipe_crc_entry *entries;
struct intel_crtc *crtc =
to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]);
DRM_DEBUG_DRIVER("stopping CRCs for pipe %c\n",
pipe_name(pipe));
drm_modeset_lock(&crtc->base.mutex, NULL);
if (crtc->base.state->active)
intel_wait_for_vblank(dev, pipe);
drm_modeset_unlock(&crtc->base.mutex);
spin_lock_irq(&pipe_crc->lock);
entries = pipe_crc->entries;
pipe_crc->entries = NULL;
pipe_crc->head = 0;
pipe_crc->tail = 0;
spin_unlock_irq(&pipe_crc->lock);
kfree(entries);
if (IS_G4X(dev))
g4x_undo_pipe_scramble_reset(dev, pipe);
else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev))
vlv_undo_pipe_scramble_reset(dev, pipe);
else if (IS_HASWELL(dev) && pipe == PIPE_A)
hsw_trans_edp_pipe_A_crc_wa(dev, false);
hsw_enable_ips(crtc);
}
ret = 0;
out:
intel_display_power_put(dev_priv, power_domain);
return ret;
}
/*
* Parse pipe CRC command strings:
* command: wsp* object wsp+ name wsp+ source wsp*
* object: 'pipe'
* name: (A | B | C)
* source: (none | plane1 | plane2 | pf)
* wsp: (#0x20 | #0x9 | #0xA)+
*
* eg.:
* "pipe A plane1" -> Start CRC computations on plane1 of pipe A
* "pipe A none" -> Stop CRC
*/
static int display_crc_ctl_tokenize(char *buf, char *words[], int max_words)
{
int n_words = 0;
while (*buf) {
char *end;
/* skip leading white space */
buf = skip_spaces(buf);
if (!*buf)
break; /* end of buffer */
/* find end of word */
for (end = buf; *end && !isspace(*end); end++)
;
if (n_words == max_words) {
DRM_DEBUG_DRIVER("too many words, allowed <= %d\n",
max_words);
return -EINVAL; /* ran out of words[] before bytes */
}
if (*end)
*end++ = '\0';
words[n_words++] = buf;
buf = end;
}
return n_words;
}
enum intel_pipe_crc_object {
PIPE_CRC_OBJECT_PIPE,
};
static const char * const pipe_crc_objects[] = {
"pipe",
};
static int
display_crc_ctl_parse_object(const char *buf, enum intel_pipe_crc_object *o)
{
int i;
for (i = 0; i < ARRAY_SIZE(pipe_crc_objects); i++)
if (!strcmp(buf, pipe_crc_objects[i])) {
*o = i;
return 0;
}
return -EINVAL;
}
static int display_crc_ctl_parse_pipe(const char *buf, enum pipe *pipe)
{
const char name = buf[0];
if (name < 'A' || name >= pipe_name(I915_MAX_PIPES))
return -EINVAL;
*pipe = name - 'A';
return 0;
}
static int
display_crc_ctl_parse_source(const char *buf, enum intel_pipe_crc_source *s)
{
int i;
for (i = 0; i < ARRAY_SIZE(pipe_crc_sources); i++)
if (!strcmp(buf, pipe_crc_sources[i])) {
*s = i;
return 0;
}
return -EINVAL;
}
static int display_crc_ctl_parse(struct drm_device *dev, char *buf, size_t len)
{
#define N_WORDS 3
int n_words;
char *words[N_WORDS];
enum pipe pipe;
enum intel_pipe_crc_object object;
enum intel_pipe_crc_source source;
n_words = display_crc_ctl_tokenize(buf, words, N_WORDS);
if (n_words != N_WORDS) {
DRM_DEBUG_DRIVER("tokenize failed, a command is %d words\n",
N_WORDS);
return -EINVAL;
}
if (display_crc_ctl_parse_object(words[0], &object) < 0) {
DRM_DEBUG_DRIVER("unknown object %s\n", words[0]);
return -EINVAL;
}
if (display_crc_ctl_parse_pipe(words[1], &pipe) < 0) {
DRM_DEBUG_DRIVER("unknown pipe %s\n", words[1]);
return -EINVAL;
}
if (display_crc_ctl_parse_source(words[2], &source) < 0) {
DRM_DEBUG_DRIVER("unknown source %s\n", words[2]);
return -EINVAL;
}
return pipe_crc_set_source(dev, pipe, source);
}
static ssize_t display_crc_ctl_write(struct file *file, const char __user *ubuf,
size_t len, loff_t *offp)
{
struct seq_file *m = file->private_data;
struct drm_device *dev = m->private;
char *tmpbuf;
int ret;
if (len == 0)
return 0;
if (len > PAGE_SIZE - 1) {
DRM_DEBUG_DRIVER("expected <%lu bytes into pipe crc control\n",
PAGE_SIZE);
return -E2BIG;
}
tmpbuf = kmalloc(len + 1, GFP_KERNEL);
if (!tmpbuf)
return -ENOMEM;
if (copy_from_user(tmpbuf, ubuf, len)) {
ret = -EFAULT;
goto out;
}
tmpbuf[len] = '\0';
ret = display_crc_ctl_parse(dev, tmpbuf, len);
out:
kfree(tmpbuf);
if (ret < 0)
return ret;
*offp += len;
return len;
}
static const struct file_operations i915_display_crc_ctl_fops = {
.owner = THIS_MODULE,
.open = display_crc_ctl_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = display_crc_ctl_write
};
static ssize_t i915_displayport_test_active_write(struct file *file,
const char __user *ubuf,
size_t len, loff_t *offp)
{
char *input_buffer;
int status = 0;
struct drm_device *dev;
struct drm_connector *connector;
struct list_head *connector_list;
struct intel_dp *intel_dp;
int val = 0;
dev = ((struct seq_file *)file->private_data)->private;
connector_list = &dev->mode_config.connector_list;
if (len == 0)
return 0;
input_buffer = kmalloc(len + 1, GFP_KERNEL);
if (!input_buffer)
return -ENOMEM;
if (copy_from_user(input_buffer, ubuf, len)) {
status = -EFAULT;
goto out;
}
input_buffer[len] = '\0';
DRM_DEBUG_DRIVER("Copied %d bytes from user\n", (unsigned int)len);
list_for_each_entry(connector, connector_list, head) {
if (connector->connector_type !=
DRM_MODE_CONNECTOR_DisplayPort)
continue;
if (connector->status == connector_status_connected &&
connector->encoder != NULL) {
intel_dp = enc_to_intel_dp(connector->encoder);
status = kstrtoint(input_buffer, 10, &val);
if (status < 0)
goto out;
DRM_DEBUG_DRIVER("Got %d for test active\n", val);
/* To prevent erroneous activation of the compliance
* testing code, only accept an actual value of 1 here
*/
if (val == 1)
intel_dp->compliance_test_active = 1;
else
intel_dp->compliance_test_active = 0;
}
}
out:
kfree(input_buffer);
if (status < 0)
return status;
*offp += len;
return len;
}
static int i915_displayport_test_active_show(struct seq_file *m, void *data)
{
struct drm_device *dev = m->private;
struct drm_connector *connector;
struct list_head *connector_list = &dev->mode_config.connector_list;
struct intel_dp *intel_dp;
list_for_each_entry(connector, connector_list, head) {
if (connector->connector_type !=
DRM_MODE_CONNECTOR_DisplayPort)
continue;
if (connector->status == connector_status_connected &&
connector->encoder != NULL) {
intel_dp = enc_to_intel_dp(connector->encoder);
if (intel_dp->compliance_test_active)
seq_puts(m, "1");
else
seq_puts(m, "0");
} else
seq_puts(m, "0");
}
return 0;
}
static int i915_displayport_test_active_open(struct inode *inode,
struct file *file)
{
struct drm_device *dev = inode->i_private;
return single_open(file, i915_displayport_test_active_show, dev);
}
static const struct file_operations i915_displayport_test_active_fops = {
.owner = THIS_MODULE,
.open = i915_displayport_test_active_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = i915_displayport_test_active_write
};
static int i915_displayport_test_data_show(struct seq_file *m, void *data)
{
struct drm_device *dev = m->private;
struct drm_connector *connector;
struct list_head *connector_list = &dev->mode_config.connector_list;
struct intel_dp *intel_dp;
list_for_each_entry(connector, connector_list, head) {
if (connector->connector_type !=
DRM_MODE_CONNECTOR_DisplayPort)
continue;
if (connector->status == connector_status_connected &&
connector->encoder != NULL) {
intel_dp = enc_to_intel_dp(connector->encoder);
seq_printf(m, "%lx", intel_dp->compliance_test_data);
} else
seq_puts(m, "0");
}
return 0;
}
static int i915_displayport_test_data_open(struct inode *inode,
struct file *file)
{
struct drm_device *dev = inode->i_private;
return single_open(file, i915_displayport_test_data_show, dev);
}
static const struct file_operations i915_displayport_test_data_fops = {
.owner = THIS_MODULE,
.open = i915_displayport_test_data_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release
};
static int i915_displayport_test_type_show(struct seq_file *m, void *data)
{
struct drm_device *dev = m->private;
struct drm_connector *connector;
struct list_head *connector_list = &dev->mode_config.connector_list;
struct intel_dp *intel_dp;
list_for_each_entry(connector, connector_list, head) {
if (connector->connector_type !=
DRM_MODE_CONNECTOR_DisplayPort)
continue;
if (connector->status == connector_status_connected &&
connector->encoder != NULL) {
intel_dp = enc_to_intel_dp(connector->encoder);
seq_printf(m, "%02lx", intel_dp->compliance_test_type);
} else
seq_puts(m, "0");
}
return 0;
}
static int i915_displayport_test_type_open(struct inode *inode,
struct file *file)
{
struct drm_device *dev = inode->i_private;
return single_open(file, i915_displayport_test_type_show, dev);
}
static const struct file_operations i915_displayport_test_type_fops = {
.owner = THIS_MODULE,
.open = i915_displayport_test_type_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release
};
static void wm_latency_show(struct seq_file *m, const uint16_t wm[8])
{
struct drm_device *dev = m->private;
int level;
int num_levels;
if (IS_CHERRYVIEW(dev))
num_levels = 3;
else if (IS_VALLEYVIEW(dev))
num_levels = 1;
else
num_levels = ilk_wm_max_level(dev) + 1;
drm_modeset_lock_all(dev);
for (level = 0; level < num_levels; level++) {
unsigned int latency = wm[level];
/*
* - WM1+ latency values in 0.5us units
* - latencies are in us on gen9/vlv/chv
*/
if (INTEL_INFO(dev)->gen >= 9 || IS_VALLEYVIEW(dev) ||
IS_CHERRYVIEW(dev))
latency *= 10;
else if (level > 0)
latency *= 5;
seq_printf(m, "WM%d %u (%u.%u usec)\n",
level, wm[level], latency / 10, latency % 10);
}
drm_modeset_unlock_all(dev);
}
static int pri_wm_latency_show(struct seq_file *m, void *data)
{
struct drm_device *dev = m->private;
struct drm_i915_private *dev_priv = to_i915(dev);
const uint16_t *latencies;
if (INTEL_INFO(dev)->gen >= 9)
latencies = dev_priv->wm.skl_latency;
else
latencies = to_i915(dev)->wm.pri_latency;
wm_latency_show(m, latencies);
return 0;
}
static int spr_wm_latency_show(struct seq_file *m, void *data)
{
struct drm_device *dev = m->private;
struct drm_i915_private *dev_priv = to_i915(dev);
const uint16_t *latencies;
if (INTEL_INFO(dev)->gen >= 9)
latencies = dev_priv->wm.skl_latency;
else
latencies = to_i915(dev)->wm.spr_latency;
wm_latency_show(m, latencies);
return 0;
}
static int cur_wm_latency_show(struct seq_file *m, void *data)
{
struct drm_device *dev = m->private;
struct drm_i915_private *dev_priv = to_i915(dev);
const uint16_t *latencies;
if (INTEL_INFO(dev)->gen >= 9)
latencies = dev_priv->wm.skl_latency;
else
latencies = to_i915(dev)->wm.cur_latency;
wm_latency_show(m, latencies);
return 0;
}
static int pri_wm_latency_open(struct inode *inode, struct file *file)
{
struct drm_device *dev = inode->i_private;
if (INTEL_INFO(dev)->gen < 5)
return -ENODEV;
return single_open(file, pri_wm_latency_show, dev);
}
static int spr_wm_latency_open(struct inode *inode, struct file *file)
{
struct drm_device *dev = inode->i_private;
if (HAS_GMCH_DISPLAY(dev))
return -ENODEV;
return single_open(file, spr_wm_latency_show, dev);
}
static int cur_wm_latency_open(struct inode *inode, struct file *file)
{
struct drm_device *dev = inode->i_private;
if (HAS_GMCH_DISPLAY(dev))
return -ENODEV;
return single_open(file, cur_wm_latency_show, dev);
}
static ssize_t wm_latency_write(struct file *file, const char __user *ubuf,
size_t len, loff_t *offp, uint16_t wm[8])
{
struct seq_file *m = file->private_data;
struct drm_device *dev = m->private;
uint16_t new[8] = { 0 };
int num_levels;
int level;
int ret;
char tmp[32];
if (IS_CHERRYVIEW(dev))
num_levels = 3;
else if (IS_VALLEYVIEW(dev))
num_levels = 1;
else
num_levels = ilk_wm_max_level(dev) + 1;
if (len >= sizeof(tmp))
return -EINVAL;
if (copy_from_user(tmp, ubuf, len))
return -EFAULT;
tmp[len] = '\0';
ret = sscanf(tmp, "%hu %hu %hu %hu %hu %hu %hu %hu",
&new[0], &new[1], &new[2], &new[3],
&new[4], &new[5], &new[6], &new[7]);
if (ret != num_levels)
return -EINVAL;
drm_modeset_lock_all(dev);
for (level = 0; level < num_levels; level++)
wm[level] = new[level];
drm_modeset_unlock_all(dev);
return len;
}
static ssize_t pri_wm_latency_write(struct file *file, const char __user *ubuf,
size_t len, loff_t *offp)
{
struct seq_file *m = file->private_data;
struct drm_device *dev = m->private;
struct drm_i915_private *dev_priv = to_i915(dev);
uint16_t *latencies;
if (INTEL_INFO(dev)->gen >= 9)
latencies = dev_priv->wm.skl_latency;
else
latencies = to_i915(dev)->wm.pri_latency;
return wm_latency_write(file, ubuf, len, offp, latencies);
}
static ssize_t spr_wm_latency_write(struct file *file, const char __user *ubuf,
size_t len, loff_t *offp)
{
struct seq_file *m = file->private_data;
struct drm_device *dev = m->private;
struct drm_i915_private *dev_priv = to_i915(dev);
uint16_t *latencies;
if (INTEL_INFO(dev)->gen >= 9)
latencies = dev_priv->wm.skl_latency;
else
latencies = to_i915(dev)->wm.spr_latency;
return wm_latency_write(file, ubuf, len, offp, latencies);
}
static ssize_t cur_wm_latency_write(struct file *file, const char __user *ubuf,
size_t len, loff_t *offp)
{
struct seq_file *m = file->private_data;
struct drm_device *dev = m->private;
struct drm_i915_private *dev_priv = to_i915(dev);
uint16_t *latencies;
if (INTEL_INFO(dev)->gen >= 9)
latencies = dev_priv->wm.skl_latency;
else
latencies = to_i915(dev)->wm.cur_latency;
return wm_latency_write(file, ubuf, len, offp, latencies);
}
static const struct file_operations i915_pri_wm_latency_fops = {
.owner = THIS_MODULE,
.open = pri_wm_latency_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = pri_wm_latency_write
};
static const struct file_operations i915_spr_wm_latency_fops = {
.owner = THIS_MODULE,
.open = spr_wm_latency_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = spr_wm_latency_write
};
static const struct file_operations i915_cur_wm_latency_fops = {
.owner = THIS_MODULE,
.open = cur_wm_latency_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
.write = cur_wm_latency_write
};
static int
i915_wedged_get(void *data, u64 *val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
*val = i915_terminally_wedged(&dev_priv->gpu_error);
return 0;
}
static int
i915_wedged_set(void *data, u64 val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
/*
* There is no safeguard against this debugfs entry colliding
* with the hangcheck calling same i915_handle_error() in
* parallel, causing an explosion. For now we assume that the
* test harness is responsible enough not to inject gpu hangs
* while it is writing to 'i915_wedged'
*/
if (i915_reset_in_progress(&dev_priv->gpu_error))
return -EAGAIN;
intel_runtime_pm_get(dev_priv);
i915_handle_error(dev_priv, val,
"Manually setting wedged to %llu", val);
intel_runtime_pm_put(dev_priv);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
i915_wedged_get, i915_wedged_set,
"%llu\n");
static int
i915_ring_missed_irq_get(void *data, u64 *val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
*val = dev_priv->gpu_error.missed_irq_rings;
return 0;
}
static int
i915_ring_missed_irq_set(void *data, u64 val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
int ret;
/* Lock against concurrent debugfs callers */
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
dev_priv->gpu_error.missed_irq_rings = val;
mutex_unlock(&dev->struct_mutex);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_ring_missed_irq_fops,
i915_ring_missed_irq_get, i915_ring_missed_irq_set,
"0x%08llx\n");
static int
i915_ring_test_irq_get(void *data, u64 *val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
*val = dev_priv->gpu_error.test_irq_rings;
return 0;
}
static int
i915_ring_test_irq_set(void *data, u64 val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
val &= INTEL_INFO(dev_priv)->ring_mask;
DRM_DEBUG_DRIVER("Masking interrupts on rings 0x%08llx\n", val);
dev_priv->gpu_error.test_irq_rings = val;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops,
i915_ring_test_irq_get, i915_ring_test_irq_set,
"0x%08llx\n");
#define DROP_UNBOUND 0x1
#define DROP_BOUND 0x2
#define DROP_RETIRE 0x4
#define DROP_ACTIVE 0x8
#define DROP_ALL (DROP_UNBOUND | \
DROP_BOUND | \
DROP_RETIRE | \
DROP_ACTIVE)
static int
i915_drop_caches_get(void *data, u64 *val)
{
*val = DROP_ALL;
return 0;
}
static int
i915_drop_caches_set(void *data, u64 val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
int ret;
DRM_DEBUG("Dropping caches: 0x%08llx\n", val);
/* No need to check and wait for gpu resets, only libdrm auto-restarts
* on ioctls on -EAGAIN. */
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
if (val & DROP_ACTIVE) {
ret = i915_gem_wait_for_idle(dev_priv);
if (ret)
goto unlock;
}
if (val & (DROP_RETIRE | DROP_ACTIVE))
i915_gem_retire_requests(dev_priv);
if (val & DROP_BOUND)
i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_BOUND);
if (val & DROP_UNBOUND)
i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_UNBOUND);
unlock:
mutex_unlock(&dev->struct_mutex);
return ret;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops,
i915_drop_caches_get, i915_drop_caches_set,
"0x%08llx\n");
static int
i915_max_freq_get(void *data, u64 *val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
if (INTEL_INFO(dev)->gen < 6)
return -ENODEV;
2015-01-24 02:04:26 +07:00
*val = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit);
return 0;
}
static int
i915_max_freq_set(void *data, u64 val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
u32 hw_max, hw_min;
int ret;
if (INTEL_INFO(dev)->gen < 6)
return -ENODEV;
DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val);
ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
if (ret)
return ret;
/*
* Turbo will still be enabled, but won't go above the set value.
*/
val = intel_freq_opcode(dev_priv, val);
hw_max = dev_priv->rps.max_freq;
hw_min = dev_priv->rps.min_freq;
if (val < hw_min || val > hw_max || val < dev_priv->rps.min_freq_softlimit) {
mutex_unlock(&dev_priv->rps.hw_lock);
return -EINVAL;
}
dev_priv->rps.max_freq_softlimit = val;
intel_set_rps(dev_priv, val);
mutex_unlock(&dev_priv->rps.hw_lock);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_max_freq_fops,
i915_max_freq_get, i915_max_freq_set,
"%llu\n");
static int
i915_min_freq_get(void *data, u64 *val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
if (INTEL_GEN(dev_priv) < 6)
return -ENODEV;
2015-01-24 02:04:26 +07:00
*val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit);
return 0;
}
static int
i915_min_freq_set(void *data, u64 val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
u32 hw_max, hw_min;
int ret;
if (INTEL_GEN(dev_priv) < 6)
return -ENODEV;
DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val);
ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
if (ret)
return ret;
/*
* Turbo will still be enabled, but won't go below the set value.
*/
val = intel_freq_opcode(dev_priv, val);
hw_max = dev_priv->rps.max_freq;
hw_min = dev_priv->rps.min_freq;
if (val < hw_min || val > hw_max || val > dev_priv->rps.max_freq_softlimit) {
mutex_unlock(&dev_priv->rps.hw_lock);
return -EINVAL;
}
dev_priv->rps.min_freq_softlimit = val;
intel_set_rps(dev_priv, val);
mutex_unlock(&dev_priv->rps.hw_lock);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_min_freq_fops,
i915_min_freq_get, i915_min_freq_set,
"%llu\n");
static int
i915_cache_sharing_get(void *data, u64 *val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
u32 snpcr;
int ret;
if (!(IS_GEN6(dev) || IS_GEN7(dev)))
return -ENODEV;
ret = mutex_lock_interruptible(&dev->struct_mutex);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex);
*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
return 0;
}
static int
i915_cache_sharing_set(void *data, u64 val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
u32 snpcr;
if (!(IS_GEN6(dev) || IS_GEN7(dev)))
return -ENODEV;
if (val > 3)
return -EINVAL;
intel_runtime_pm_get(dev_priv);
DRM_DEBUG_DRIVER("Manually setting uncore sharing to %llu\n", val);
/* Update the cache sharing policy here as well */
snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
snpcr &= ~GEN6_MBC_SNPCR_MASK;
snpcr |= (val << GEN6_MBC_SNPCR_SHIFT);
I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
intel_runtime_pm_put(dev_priv);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(i915_cache_sharing_fops,
i915_cache_sharing_get, i915_cache_sharing_set,
"%llu\n");
struct sseu_dev_status {
unsigned int slice_total;
unsigned int subslice_total;
unsigned int subslice_per_slice;
unsigned int eu_total;
unsigned int eu_per_subslice;
};
static void cherryview_sseu_device_status(struct drm_device *dev,
struct sseu_dev_status *stat)
{
struct drm_i915_private *dev_priv = to_i915(dev);
int ss_max = 2;
int ss;
u32 sig1[ss_max], sig2[ss_max];
sig1[0] = I915_READ(CHV_POWER_SS0_SIG1);
sig1[1] = I915_READ(CHV_POWER_SS1_SIG1);
sig2[0] = I915_READ(CHV_POWER_SS0_SIG2);
sig2[1] = I915_READ(CHV_POWER_SS1_SIG2);
for (ss = 0; ss < ss_max; ss++) {
unsigned int eu_cnt;
if (sig1[ss] & CHV_SS_PG_ENABLE)
/* skip disabled subslice */
continue;
stat->slice_total = 1;
stat->subslice_per_slice++;
eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
((sig2[ss] & CHV_EU311_PG_ENABLE) ? 0 : 2);
stat->eu_total += eu_cnt;
stat->eu_per_subslice = max(stat->eu_per_subslice, eu_cnt);
}
stat->subslice_total = stat->subslice_per_slice;
}
static void gen9_sseu_device_status(struct drm_device *dev,
struct sseu_dev_status *stat)
{
struct drm_i915_private *dev_priv = to_i915(dev);
int s_max = 3, ss_max = 4;
int s, ss;
u32 s_reg[s_max], eu_reg[2*s_max], eu_mask[2];
/* BXT has a single slice and at most 3 subslices. */
if (IS_BROXTON(dev)) {
s_max = 1;
ss_max = 3;
}
for (s = 0; s < s_max; s++) {
s_reg[s] = I915_READ(GEN9_SLICE_PGCTL_ACK(s));
eu_reg[2*s] = I915_READ(GEN9_SS01_EU_PGCTL_ACK(s));
eu_reg[2*s + 1] = I915_READ(GEN9_SS23_EU_PGCTL_ACK(s));
}
eu_mask[0] = GEN9_PGCTL_SSA_EU08_ACK |
GEN9_PGCTL_SSA_EU19_ACK |
GEN9_PGCTL_SSA_EU210_ACK |
GEN9_PGCTL_SSA_EU311_ACK;
eu_mask[1] = GEN9_PGCTL_SSB_EU08_ACK |
GEN9_PGCTL_SSB_EU19_ACK |
GEN9_PGCTL_SSB_EU210_ACK |
GEN9_PGCTL_SSB_EU311_ACK;
for (s = 0; s < s_max; s++) {
unsigned int ss_cnt = 0;
if ((s_reg[s] & GEN9_PGCTL_SLICE_ACK) == 0)
/* skip disabled slice */
continue;
stat->slice_total++;
if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
ss_cnt = INTEL_INFO(dev)->subslice_per_slice;
for (ss = 0; ss < ss_max; ss++) {
unsigned int eu_cnt;
if (IS_BROXTON(dev) &&
!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
/* skip disabled subslice */
continue;
if (IS_BROXTON(dev))
ss_cnt++;
eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
eu_mask[ss%2]);
stat->eu_total += eu_cnt;
stat->eu_per_subslice = max(stat->eu_per_subslice,
eu_cnt);
}
stat->subslice_total += ss_cnt;
stat->subslice_per_slice = max(stat->subslice_per_slice,
ss_cnt);
}
}
static void broadwell_sseu_device_status(struct drm_device *dev,
struct sseu_dev_status *stat)
{
struct drm_i915_private *dev_priv = to_i915(dev);
int s;
u32 slice_info = I915_READ(GEN8_GT_SLICE_INFO);
stat->slice_total = hweight32(slice_info & GEN8_LSLICESTAT_MASK);
if (stat->slice_total) {
stat->subslice_per_slice = INTEL_INFO(dev)->subslice_per_slice;
stat->subslice_total = stat->slice_total *
stat->subslice_per_slice;
stat->eu_per_subslice = INTEL_INFO(dev)->eu_per_subslice;
stat->eu_total = stat->eu_per_subslice * stat->subslice_total;
/* subtract fused off EU(s) from enabled slice(s) */
for (s = 0; s < stat->slice_total; s++) {
u8 subslice_7eu = INTEL_INFO(dev)->subslice_7eu[s];
stat->eu_total -= hweight8(subslice_7eu);
}
}
}
static int i915_sseu_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_i915_private *dev_priv = to_i915(node->minor->dev);
struct drm_device *dev = &dev_priv->drm;
struct sseu_dev_status stat;
if (INTEL_INFO(dev)->gen < 8)
return -ENODEV;
seq_puts(m, "SSEU Device Info\n");
seq_printf(m, " Available Slice Total: %u\n",
INTEL_INFO(dev)->slice_total);
seq_printf(m, " Available Subslice Total: %u\n",
INTEL_INFO(dev)->subslice_total);
seq_printf(m, " Available Subslice Per Slice: %u\n",
INTEL_INFO(dev)->subslice_per_slice);
seq_printf(m, " Available EU Total: %u\n",
INTEL_INFO(dev)->eu_total);
seq_printf(m, " Available EU Per Subslice: %u\n",
INTEL_INFO(dev)->eu_per_subslice);
drm/i915:bxt: Enable Pooled EU support This mode allows to assign EUs to pools which can process work collectively. The command to enable this mode should be issued as part of context initialization. The pooled mode is global, once enabled it has to stay the same across all contexts until HW reset hence this is sent in auxiliary golden context batch. Thanks to Mika for the preliminary review and comments. v2: explain why this is enabled in golden context, use feature flag while enabling the support (Chris) v3: Include only kernel support as userspace support is not available yet. User space clients need to know when the pooled EU feature is present and enabled on the hardware so that they can adapt work submissions. Create a new device info flag for this purpose. Set has_pooled_eu to true in the Broxton static device info - Broxton supports the feature in hardware and the driver will enable it by default. We need to add getparam ioctls to enable userspace to query availability of this feature and to retrieve min. no of eus in a pool but we will expose them once userspace support is available. Opensource users for this feature are mesa, libva and beignet. Beignet team is currently working on adding userspace support. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v2) Cc: Winiarski, Michal <michal.winiarski@intel.com> Cc: Zou, Nanhai <nanhai.zou@intel.com> Cc: Yang, Rong R <rong.r.yang@intel.com> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Armin Reese <armin.c.reese@intel.com> Cc: Tim Gore <tim.gore@intel.com> Signed-off-by: Jeff McGee <jeff.mcgee@intel.com> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> Reviewed-by: Michał Winiarski <michal.winiarski@intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
2016-06-03 12:34:33 +07:00
seq_printf(m, " Has Pooled EU: %s\n", yesno(HAS_POOLED_EU(dev)));
if (HAS_POOLED_EU(dev))
seq_printf(m, " Min EU in pool: %u\n",
INTEL_INFO(dev)->min_eu_in_pool);
seq_printf(m, " Has Slice Power Gating: %s\n",
yesno(INTEL_INFO(dev)->has_slice_pg));
seq_printf(m, " Has Subslice Power Gating: %s\n",
yesno(INTEL_INFO(dev)->has_subslice_pg));
seq_printf(m, " Has EU Power Gating: %s\n",
yesno(INTEL_INFO(dev)->has_eu_pg));
seq_puts(m, "SSEU Device Status\n");
memset(&stat, 0, sizeof(stat));
intel_runtime_pm_get(dev_priv);
if (IS_CHERRYVIEW(dev)) {
cherryview_sseu_device_status(dev, &stat);
} else if (IS_BROADWELL(dev)) {
broadwell_sseu_device_status(dev, &stat);
} else if (INTEL_INFO(dev)->gen >= 9) {
gen9_sseu_device_status(dev, &stat);
}
intel_runtime_pm_put(dev_priv);
seq_printf(m, " Enabled Slice Total: %u\n",
stat.slice_total);
seq_printf(m, " Enabled Subslice Total: %u\n",
stat.subslice_total);
seq_printf(m, " Enabled Subslice Per Slice: %u\n",
stat.subslice_per_slice);
seq_printf(m, " Enabled EU Total: %u\n",
stat.eu_total);
seq_printf(m, " Enabled EU Per Subslice: %u\n",
stat.eu_per_subslice);
return 0;
}
static int i915_forcewake_open(struct inode *inode, struct file *file)
{
struct drm_device *dev = inode->i_private;
struct drm_i915_private *dev_priv = to_i915(dev);
if (INTEL_INFO(dev)->gen < 6)
return 0;
intel_runtime_pm_get(dev_priv);
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
return 0;
}
static int i915_forcewake_release(struct inode *inode, struct file *file)
{
struct drm_device *dev = inode->i_private;
struct drm_i915_private *dev_priv = to_i915(dev);
if (INTEL_INFO(dev)->gen < 6)
return 0;
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
intel_runtime_pm_put(dev_priv);
return 0;
}
static const struct file_operations i915_forcewake_fops = {
.owner = THIS_MODULE,
.open = i915_forcewake_open,
.release = i915_forcewake_release,
};
static int i915_forcewake_create(struct dentry *root, struct drm_minor *minor)
{
struct drm_device *dev = minor->dev;
struct dentry *ent;
ent = debugfs_create_file("i915_forcewake_user",
S_IRUSR,
root, dev,
&i915_forcewake_fops);
if (!ent)
return -ENOMEM;
return drm_add_fake_info_node(minor, ent, &i915_forcewake_fops);
}
static int i915_debugfs_create(struct dentry *root,
struct drm_minor *minor,
const char *name,
const struct file_operations *fops)
{
struct drm_device *dev = minor->dev;
struct dentry *ent;
ent = debugfs_create_file(name,
S_IRUGO | S_IWUSR,
root, dev,
fops);
if (!ent)
return -ENOMEM;
return drm_add_fake_info_node(minor, ent, fops);
}
static const struct drm_info_list i915_debugfs_list[] = {
{"i915_capabilities", i915_capabilities, 0},
{"i915_gem_objects", i915_gem_object_info, 0},
{"i915_gem_gtt", i915_gem_gtt_info, 0},
{"i915_gem_pinned", i915_gem_gtt_info, 0, (void *) PINNED_LIST},
{"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST},
{"i915_gem_inactive", i915_gem_object_list_info, 0, (void *) INACTIVE_LIST},
{"i915_gem_stolen", i915_gem_stolen_list_info },
{"i915_gem_pageflip", i915_gem_pageflip_info, 0},
{"i915_gem_request", i915_gem_request_info, 0},
{"i915_gem_seqno", i915_gem_seqno_info, 0},
{"i915_gem_fence_regs", i915_gem_fence_regs_info, 0},
{"i915_gem_interrupt", i915_interrupt_info, 0},
{"i915_gem_hws", i915_hws_info, 0, (void *)RCS},
{"i915_gem_hws_blt", i915_hws_info, 0, (void *)BCS},
{"i915_gem_hws_bsd", i915_hws_info, 0, (void *)VCS},
{"i915_gem_hws_vebox", i915_hws_info, 0, (void *)VECS},
drm/i915: Implement a framework for batch buffer pools This adds a small module for managing a pool of batch buffers. The only current use case is for the command parser, as described in the kerneldoc in the patch. The code is simple, but separating it out makes it easier to change the underlying algorithms and to extend to future use cases should they arise. The interface is simple: init to create an empty pool, fini to clean it up, get to obtain a new buffer. Note that all buffers are expected to be inactive before cleaning up the pool. Locking is currently based on the caller holding the struct_mutex. We already do that in the places where we will use the batch pool for the command parser. v2: - s/BUG_ON/WARN_ON/ for locking assertions - Remove the cap on pool size - Switch from alloc/free to init/fini v3: - Idiomatic looping structure in _fini - Correct handling of purged objects - Don't return a buffer that's too much larger than needed v4: - Rebased to latest -nightly v5: - Remove _put() function and clean up comments to match v6: - Move purged check inside the loop (danvet, from v4 1/7 feedback) v7: - Use single list instead of two. (Chris W) - s/active_list/cache_list - Squashed in debug patches (Chris W) drm/i915: Add a batch pool debugfs file It provides some useful information about the buffers in the global command parser batch pool. v2: rebase on global pool instead of per-ring pools v3: rebase drm/i915: Add batch pool details to i915_gem_objects debugfs To better account for the potentially large memory consumption of the batch pool. v8: - Keep cache in LRU order (danvet, from v6 1/5 feedback) Issue: VIZ-4719 Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com> Reviewed-By: Jon Bloomfield <jon.bloomfield@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-12-12 03:13:08 +07:00
{"i915_gem_batch_pool", i915_gem_batch_pool_info, 0},
{"i915_guc_info", i915_guc_info, 0},
{"i915_guc_load_status", i915_guc_load_status_info, 0},
{"i915_guc_log_dump", i915_guc_log_dump, 0},
{"i915_frequency_info", i915_frequency_info, 0},
{"i915_hangcheck_info", i915_hangcheck_info, 0},
{"i915_drpc_info", i915_drpc_info, 0},
{"i915_emon_status", i915_emon_status, 0},
{"i915_ring_freq_table", i915_ring_freq_table, 0},
{"i915_frontbuffer_tracking", i915_frontbuffer_tracking, 0},
{"i915_fbc_status", i915_fbc_status, 0},
{"i915_ips_status", i915_ips_status, 0},
{"i915_sr_status", i915_sr_status, 0},
{"i915_opregion", i915_opregion, 0},
{"i915_vbt", i915_vbt, 0},
{"i915_gem_framebuffer", i915_gem_framebuffer_info, 0},
{"i915_context_status", i915_context_status, 0},
{"i915_dump_lrc", i915_dump_lrc, 0},
{"i915_execlists", i915_execlists, 0},
{"i915_forcewake_domains", i915_forcewake_domains, 0},
{"i915_swizzle_info", i915_swizzle_info, 0},
{"i915_ppgtt_info", i915_ppgtt_info, 0},
{"i915_llc", i915_llc, 0},
{"i915_edp_psr_status", i915_edp_psr_status, 0},
{"i915_sink_crc_eDP1", i915_sink_crc, 0},
{"i915_energy_uJ", i915_energy_uJ, 0},
{"i915_runtime_pm_status", i915_runtime_pm_status, 0},
{"i915_power_domain_info", i915_power_domain_info, 0},
{"i915_dmc_info", i915_dmc_info, 0},
{"i915_display_info", i915_display_info, 0},
{"i915_semaphore_status", i915_semaphore_status, 0},
{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
{"i915_dp_mst_info", i915_dp_mst_info, 0},
{"i915_wa_registers", i915_wa_registers, 0},
{"i915_ddb_info", i915_ddb_info, 0},
{"i915_sseu_status", i915_sseu_status, 0},
{"i915_drrs_status", i915_drrs_status, 0},
{"i915_rps_boost_info", i915_rps_boost_info, 0},
};
#define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
static const struct i915_debugfs_files {
const char *name;
const struct file_operations *fops;
} i915_debugfs_files[] = {
{"i915_wedged", &i915_wedged_fops},
{"i915_max_freq", &i915_max_freq_fops},
{"i915_min_freq", &i915_min_freq_fops},
{"i915_cache_sharing", &i915_cache_sharing_fops},
{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
{"i915_ring_test_irq", &i915_ring_test_irq_fops},
{"i915_gem_drop_caches", &i915_drop_caches_fops},
{"i915_error_state", &i915_error_state_fops},
{"i915_next_seqno", &i915_next_seqno_fops},
{"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
{"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
{"i915_spr_wm_latency", &i915_spr_wm_latency_fops},
{"i915_cur_wm_latency", &i915_cur_wm_latency_fops},
{"i915_fbc_false_color", &i915_fbc_fc_fops},
{"i915_dp_test_data", &i915_displayport_test_data_fops},
{"i915_dp_test_type", &i915_displayport_test_type_fops},
{"i915_dp_test_active", &i915_displayport_test_active_fops}
};
void intel_display_crc_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
enum pipe pipe;
for_each_pipe(dev_priv, pipe) {
struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[pipe];
pipe_crc->opened = false;
spin_lock_init(&pipe_crc->lock);
init_waitqueue_head(&pipe_crc->wq);
}
}
int i915_debugfs_register(struct drm_i915_private *dev_priv)
{
struct drm_minor *minor = dev_priv->drm.primary;
int ret, i;
ret = i915_forcewake_create(minor->debugfs_root, minor);
if (ret)
return ret;
for (i = 0; i < ARRAY_SIZE(i915_pipe_crc_data); i++) {
ret = i915_pipe_crc_create(minor->debugfs_root, minor, i);
if (ret)
return ret;
}
for (i = 0; i < ARRAY_SIZE(i915_debugfs_files); i++) {
ret = i915_debugfs_create(minor->debugfs_root, minor,
i915_debugfs_files[i].name,
i915_debugfs_files[i].fops);
if (ret)
return ret;
}
return drm_debugfs_create_files(i915_debugfs_list,
I915_DEBUGFS_ENTRIES,
minor->debugfs_root, minor);
}
void i915_debugfs_unregister(struct drm_i915_private *dev_priv)
{
struct drm_minor *minor = dev_priv->drm.primary;
int i;
drm_debugfs_remove_files(i915_debugfs_list,
I915_DEBUGFS_ENTRIES, minor);
drm_debugfs_remove_files((struct drm_info_list *) &i915_forcewake_fops,
1, minor);
for (i = 0; i < ARRAY_SIZE(i915_pipe_crc_data); i++) {
struct drm_info_list *info_list =
(struct drm_info_list *)&i915_pipe_crc_data[i];
drm_debugfs_remove_files(info_list, 1, minor);
}
for (i = 0; i < ARRAY_SIZE(i915_debugfs_files); i++) {
struct drm_info_list *info_list =
(struct drm_info_list *) i915_debugfs_files[i].fops;
drm_debugfs_remove_files(info_list, 1, minor);
}
}
struct dpcd_block {
/* DPCD dump start address. */
unsigned int offset;
/* DPCD dump end address, inclusive. If unset, .size will be used. */
unsigned int end;
/* DPCD dump size. Used if .end is unset. If unset, defaults to 1. */
size_t size;
/* Only valid for eDP. */
bool edp;
};
static const struct dpcd_block i915_dpcd_debug[] = {
{ .offset = DP_DPCD_REV, .size = DP_RECEIVER_CAP_SIZE },
{ .offset = DP_PSR_SUPPORT, .end = DP_PSR_CAPS },
{ .offset = DP_DOWNSTREAM_PORT_0, .size = 16 },
{ .offset = DP_LINK_BW_SET, .end = DP_EDP_CONFIGURATION_SET },
{ .offset = DP_SINK_COUNT, .end = DP_ADJUST_REQUEST_LANE2_3 },
{ .offset = DP_SET_POWER },
{ .offset = DP_EDP_DPCD_REV },
{ .offset = DP_EDP_GENERAL_CAP_1, .end = DP_EDP_GENERAL_CAP_3 },
{ .offset = DP_EDP_DISPLAY_CONTROL_REGISTER, .end = DP_EDP_BACKLIGHT_FREQ_CAP_MAX_LSB },
{ .offset = DP_EDP_DBC_MINIMUM_BRIGHTNESS_SET, .end = DP_EDP_DBC_MAXIMUM_BRIGHTNESS_SET },
};
static int i915_dpcd_show(struct seq_file *m, void *data)
{
struct drm_connector *connector = m->private;
struct intel_dp *intel_dp =
enc_to_intel_dp(&intel_attached_encoder(connector)->base);
uint8_t buf[16];
ssize_t err;
int i;
if (connector->status != connector_status_connected)
return -ENODEV;
for (i = 0; i < ARRAY_SIZE(i915_dpcd_debug); i++) {
const struct dpcd_block *b = &i915_dpcd_debug[i];
size_t size = b->end ? b->end - b->offset + 1 : (b->size ?: 1);
if (b->edp &&
connector->connector_type != DRM_MODE_CONNECTOR_eDP)
continue;
/* low tech for now */
if (WARN_ON(size > sizeof(buf)))
continue;
err = drm_dp_dpcd_read(&intel_dp->aux, b->offset, buf, size);
if (err <= 0) {
DRM_ERROR("dpcd read (%zu bytes at %u) failed (%zd)\n",
size, b->offset, err);
continue;
}
seq_printf(m, "%04x: %*ph\n", b->offset, (int) size, buf);
}
return 0;
}
static int i915_dpcd_open(struct inode *inode, struct file *file)
{
return single_open(file, i915_dpcd_show, inode->i_private);
}
static const struct file_operations i915_dpcd_fops = {
.owner = THIS_MODULE,
.open = i915_dpcd_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
/**
* i915_debugfs_connector_add - add i915 specific connector debugfs files
* @connector: pointer to a registered drm_connector
*
* Cleanup will be done by drm_connector_unregister() through a call to
* drm_debugfs_connector_remove().
*
* Returns 0 on success, negative error codes on error.
*/
int i915_debugfs_connector_add(struct drm_connector *connector)
{
struct dentry *root = connector->debugfs_entry;
/* The connector must have been registered beforehands. */
if (!root)
return -ENODEV;
if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
connector->connector_type == DRM_MODE_CONNECTOR_eDP)
debugfs_create_file("i915_dpcd", S_IRUGO, root, connector,
&i915_dpcd_fops);
return 0;
}