linux_dsm_epyc7002/drivers/gpu/drm/i915/i915_gem_execbuffer.c

1287 lines
34 KiB
C
Raw Normal View History

/*
* Copyright © 2008,2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
#include <drm/drmP.h>
#include <drm/i915_drm.h>
#include "i915_drv.h"
#include "i915_trace.h"
#include "intel_drv.h"
#include <linux/dma_remapping.h>
struct eb_objects {
struct list_head objects;
int and;
union {
struct drm_i915_gem_object *lut[0];
struct hlist_head buckets[0];
};
};
static struct eb_objects *
eb_create(struct drm_i915_gem_execbuffer2 *args)
{
struct eb_objects *eb = NULL;
if (args->flags & I915_EXEC_HANDLE_LUT) {
int size = args->buffer_count;
size *= sizeof(struct drm_i915_gem_object *);
size += sizeof(struct eb_objects);
eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
}
if (eb == NULL) {
int size = args->buffer_count;
int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
while (count > 2*size)
count >>= 1;
eb = kzalloc(count*sizeof(struct hlist_head) +
sizeof(struct eb_objects),
GFP_TEMPORARY);
if (eb == NULL)
return eb;
eb->and = count - 1;
} else
eb->and = -args->buffer_count;
INIT_LIST_HEAD(&eb->objects);
return eb;
}
static void
eb_reset(struct eb_objects *eb)
{
if (eb->and >= 0)
memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
}
static int
eb_lookup_objects(struct eb_objects *eb,
struct drm_i915_gem_exec_object2 *exec,
const struct drm_i915_gem_execbuffer2 *args,
struct drm_file *file)
{
int i;
spin_lock(&file->table_lock);
for (i = 0; i < args->buffer_count; i++) {
struct drm_i915_gem_object *obj;
obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
if (obj == NULL) {
spin_unlock(&file->table_lock);
DRM_DEBUG("Invalid object handle %d at index %d\n",
exec[i].handle, i);
return -ENOENT;
}
if (!list_empty(&obj->exec_list)) {
spin_unlock(&file->table_lock);
DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
obj, exec[i].handle, i);
return -EINVAL;
}
drm_gem_object_reference(&obj->base);
list_add_tail(&obj->exec_list, &eb->objects);
obj->exec_entry = &exec[i];
if (eb->and < 0) {
eb->lut[i] = obj;
} else {
uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
obj->exec_handle = handle;
hlist_add_head(&obj->exec_node,
&eb->buckets[handle & eb->and]);
}
}
spin_unlock(&file->table_lock);
return 0;
}
static struct drm_i915_gem_object *
eb_get_object(struct eb_objects *eb, unsigned long handle)
{
if (eb->and < 0) {
if (handle >= -eb->and)
return NULL;
return eb->lut[handle];
} else {
struct hlist_head *head;
struct hlist_node *node;
head = &eb->buckets[handle & eb->and];
hlist_for_each(node, head) {
struct drm_i915_gem_object *obj;
obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
if (obj->exec_handle == handle)
return obj;
}
return NULL;
}
}
static void
eb_destroy(struct eb_objects *eb)
{
while (!list_empty(&eb->objects)) {
struct drm_i915_gem_object *obj;
obj = list_first_entry(&eb->objects,
struct drm_i915_gem_object,
exec_list);
list_del_init(&obj->exec_list);
drm_gem_object_unreference(&obj->base);
}
kfree(eb);
}
static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
{
return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
!obj->map_and_fenceable ||
obj->cache_level != I915_CACHE_NONE);
}
static int
relocate_entry_cpu(struct drm_i915_gem_object *obj,
struct drm_i915_gem_relocation_entry *reloc)
{
uint32_t page_offset = offset_in_page(reloc->offset);
char *vaddr;
int ret = -EINVAL;
ret = i915_gem_object_set_to_cpu_domain(obj, 1);
if (ret)
return ret;
vaddr = kmap_atomic(i915_gem_object_get_page(obj,
reloc->offset >> PAGE_SHIFT));
*(uint32_t *)(vaddr + page_offset) = reloc->delta;
kunmap_atomic(vaddr);
return 0;
}
static int
relocate_entry_gtt(struct drm_i915_gem_object *obj,
struct drm_i915_gem_relocation_entry *reloc)
{
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t __iomem *reloc_entry;
void __iomem *reloc_page;
int ret = -EINVAL;
ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret)
return ret;
ret = i915_gem_object_put_fence(obj);
if (ret)
return ret;
/* Map the page containing the relocation we're going to perform. */
reloc->offset += i915_gem_obj_ggtt_offset(obj);
reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
reloc->offset & PAGE_MASK);
reloc_entry = (uint32_t __iomem *)
(reloc_page + offset_in_page(reloc->offset));
iowrite32(reloc->delta, reloc_entry);
io_mapping_unmap_atomic(reloc_page);
return 0;
}
static int
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
struct eb_objects *eb,
struct drm_i915_gem_relocation_entry *reloc,
struct i915_address_space *vm)
{
struct drm_device *dev = obj->base.dev;
struct drm_gem_object *target_obj;
struct drm_i915_gem_object *target_i915_obj;
uint32_t target_offset;
int ret = -EINVAL;
/* we've already hold a reference to all valid objects */
target_obj = &eb_get_object(eb, reloc->target_handle)->base;
if (unlikely(target_obj == NULL))
return -ENOENT;
target_i915_obj = to_intel_bo(target_obj);
target_offset = i915_gem_obj_ggtt_offset(target_i915_obj);
/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
* pipe_control writes because the gpu doesn't properly redirect them
* through the ppgtt for non_secure batchbuffers. */
if (unlikely(IS_GEN6(dev) &&
reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
!target_i915_obj->has_global_gtt_mapping)) {
i915_gem_gtt_bind_object(target_i915_obj,
target_i915_obj->cache_level);
}
/* Validate that the target is in a valid r/w GPU domain */
if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
DRM_DEBUG("reloc with multiple write domains: "
"obj %p target %d offset %d "
"read %08x write %08x",
obj, reloc->target_handle,
(int) reloc->offset,
reloc->read_domains,
reloc->write_domain);
return ret;
}
if (unlikely((reloc->write_domain | reloc->read_domains)
& ~I915_GEM_GPU_DOMAINS)) {
DRM_DEBUG("reloc with read/write non-GPU domains: "
"obj %p target %d offset %d "
"read %08x write %08x",
obj, reloc->target_handle,
(int) reloc->offset,
reloc->read_domains,
reloc->write_domain);
return ret;
}
target_obj->pending_read_domains |= reloc->read_domains;
target_obj->pending_write_domain |= reloc->write_domain;
/* If the relocation already has the right value in it, no
* more work needs to be done.
*/
if (target_offset == reloc->presumed_offset)
return 0;
/* Check that the relocation address is valid... */
if (unlikely(reloc->offset > obj->base.size - 4)) {
DRM_DEBUG("Relocation beyond object bounds: "
"obj %p target %d offset %d size %d.\n",
obj, reloc->target_handle,
(int) reloc->offset,
(int) obj->base.size);
return ret;
}
if (unlikely(reloc->offset & 3)) {
DRM_DEBUG("Relocation not 4-byte aligned: "
"obj %p target %d offset %d.\n",
obj, reloc->target_handle,
(int) reloc->offset);
return ret;
}
/* We can't wait for rendering with pagefaults disabled */
if (obj->active && in_atomic())
return -EFAULT;
reloc->delta += target_offset;
if (use_cpu_reloc(obj))
ret = relocate_entry_cpu(obj, reloc);
else
ret = relocate_entry_gtt(obj, reloc);
if (ret)
return ret;
/* and update the user's relocation entry */
reloc->presumed_offset = target_offset;
return 0;
}
static int
i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
struct eb_objects *eb,
struct i915_address_space *vm)
{
#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
struct drm_i915_gem_relocation_entry __user *user_relocs;
struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
int remain, ret;
user_relocs = to_user_ptr(entry->relocs_ptr);
remain = entry->relocation_count;
while (remain) {
struct drm_i915_gem_relocation_entry *r = stack_reloc;
int count = remain;
if (count > ARRAY_SIZE(stack_reloc))
count = ARRAY_SIZE(stack_reloc);
remain -= count;
if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
return -EFAULT;
do {
u64 offset = r->presumed_offset;
ret = i915_gem_execbuffer_relocate_entry(obj, eb, r,
vm);
if (ret)
return ret;
if (r->presumed_offset != offset &&
__copy_to_user_inatomic(&user_relocs->presumed_offset,
&r->presumed_offset,
sizeof(r->presumed_offset))) {
return -EFAULT;
}
user_relocs++;
r++;
} while (--count);
}
return 0;
#undef N_RELOC
}
static int
i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
struct eb_objects *eb,
struct drm_i915_gem_relocation_entry *relocs,
struct i915_address_space *vm)
{
const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
int i, ret;
for (i = 0; i < entry->relocation_count; i++) {
ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i],
vm);
if (ret)
return ret;
}
return 0;
}
static int
i915_gem_execbuffer_relocate(struct eb_objects *eb,
struct i915_address_space *vm)
{
struct drm_i915_gem_object *obj;
int ret = 0;
/* This is the fast path and we cannot handle a pagefault whilst
* holding the struct mutex lest the user pass in the relocations
* contained within a mmaped bo. For in such a case we, the page
* fault handler would call i915_gem_fault() and we would try to
* acquire the struct mutex again. Obviously this is bad and so
* lockdep complains vehemently.
*/
pagefault_disable();
list_for_each_entry(obj, &eb->objects, exec_list) {
ret = i915_gem_execbuffer_relocate_object(obj, eb, vm);
if (ret)
break;
}
pagefault_enable();
return ret;
}
#define __EXEC_OBJECT_HAS_PIN (1<<31)
#define __EXEC_OBJECT_HAS_FENCE (1<<30)
static int
need_reloc_mappable(struct drm_i915_gem_object *obj)
{
struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
return entry->relocation_count && !use_cpu_reloc(obj);
}
static int
i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
struct intel_ring_buffer *ring,
struct i915_address_space *vm,
bool *need_reloc)
{
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
bool need_fence, need_mappable;
int ret;
need_fence =
has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_fence || need_reloc_mappable(obj);
ret = i915_gem_object_pin(obj, vm, entry->alignment, need_mappable,
false);
if (ret)
return ret;
entry->flags |= __EXEC_OBJECT_HAS_PIN;
if (has_fenced_gpu_access) {
if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
ret = i915_gem_object_get_fence(obj);
if (ret)
return ret;
if (i915_gem_object_pin_fence(obj))
entry->flags |= __EXEC_OBJECT_HAS_FENCE;
obj->pending_fenced_gpu_access = true;
}
}
/* Ensure ppgtt mapping exists if needed */
if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
obj, obj->cache_level);
obj->has_aliasing_ppgtt_mapping = 1;
}
if (entry->offset != i915_gem_obj_offset(obj, vm)) {
entry->offset = i915_gem_obj_offset(obj, vm);
*need_reloc = true;
}
if (entry->flags & EXEC_OBJECT_WRITE) {
obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
}
if (entry->flags & EXEC_OBJECT_NEEDS_GTT &&
!obj->has_global_gtt_mapping)
i915_gem_gtt_bind_object(obj, obj->cache_level);
return 0;
}
static void
i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
{
struct drm_i915_gem_exec_object2 *entry;
if (!i915_gem_obj_bound_any(obj))
return;
entry = obj->exec_entry;
if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
i915_gem_object_unpin_fence(obj);
if (entry->flags & __EXEC_OBJECT_HAS_PIN)
i915_gem_object_unpin(obj);
entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
}
static int
i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
struct list_head *objects,
struct i915_address_space *vm,
bool *need_relocs)
{
struct drm_i915_gem_object *obj;
struct list_head ordered_objects;
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
int retry;
INIT_LIST_HEAD(&ordered_objects);
while (!list_empty(objects)) {
struct drm_i915_gem_exec_object2 *entry;
bool need_fence, need_mappable;
obj = list_first_entry(objects,
struct drm_i915_gem_object,
exec_list);
entry = obj->exec_entry;
need_fence =
has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_fence || need_reloc_mappable(obj);
if (need_mappable)
list_move(&obj->exec_list, &ordered_objects);
else
list_move_tail(&obj->exec_list, &ordered_objects);
obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
obj->base.pending_write_domain = 0;
obj->pending_fenced_gpu_access = false;
}
list_splice(&ordered_objects, objects);
/* Attempt to pin all of the buffers into the GTT.
* This is done in 3 phases:
*
* 1a. Unbind all objects that do not match the GTT constraints for
* the execbuffer (fenceable, mappable, alignment etc).
* 1b. Increment pin count for already bound objects.
* 2. Bind new objects.
* 3. Decrement pin count.
*
* This avoid unnecessary unbinding of later objects in order to make
* room for the earlier objects *unless* we need to defragment.
*/
retry = 0;
do {
int ret = 0;
/* Unbind any ill-fitting objects or pin. */
list_for_each_entry(obj, objects, exec_list) {
struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
bool need_fence, need_mappable;
u32 obj_offset;
if (!i915_gem_obj_bound(obj, vm))
continue;
obj_offset = i915_gem_obj_offset(obj, vm);
need_fence =
has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_fence || need_reloc_mappable(obj);
WARN_ON((need_mappable || need_fence) &&
!i915_is_ggtt(vm));
if ((entry->alignment &&
obj_offset & (entry->alignment - 1)) ||
(need_mappable && !obj->map_and_fenceable))
drm/i915: plumb VM into bind/unbind code As alluded to in several patches, and it will be reiterated later... A VMA is an abstraction for a GEM BO bound into an address space. Therefore it stands to reason, that the existing bind, and unbind are the ones which will be the most impacted. This patch implements this, and updates all callers which weren't already updated in the series (because it was too messy). This patch represents the bulk of an earlier, larger patch. I've pulled out a bunch of things by the request of Daniel. The history is preserved for posterity with the email convention of ">" One big change from the original patch aside from a bunch of cropping is I've created an i915_vma_unbind() function. That is because we always have the VMA anyway, and doing an extra lookup is useful. There is a caveat, we retain an i915_gem_object_ggtt_unbind, for the global cases which might not talk in VMAs. > drm/i915: plumb VM into object operations > > This patch was formerly known as: > "drm/i915: Create VMAs (part 3) - plumbing" > > This patch adds a VM argument, bind/unbind, and the object > offset/size/color getters/setters. It preserves the old ggtt helper > functions because things still need, and will continue to need them. > > Some code will still need to be ported over after this. > > v2: Fix purge to pick an object and unbind all vmas > This was doable because of the global bound list change. > > v3: With the commit to actually pin/unpin pages in place, there is no > longer a need to check if unbind succeeded before calling put_pages(). > Make put_pages only BUG() after checking pin count. > > v4: Rebased on top of the new hangcheck work by Mika > plumbed eb_destroy also > Many checkpatch related fixes > > v5: Very large rebase > > v6: > Change BUG_ON to WARN_ON (Daniel) > Rename vm to ggtt in preallocate stolen, since it is always ggtt when > dealing with stolen memory. (Daniel) > list_for_each will short-circuit already (Daniel) > remove superflous space (Daniel) > Use per object list of vmas (Daniel) > Make obj_bound_any() use obj_bound for each vm (Ben) > s/bind_to_gtt/bind_to_vm/ (Ben) > > Fixed up the inactive shrinker. As Daniel noticed the code could > potentially count the same object multiple times. While it's not > possible in the current case, since 1 object can only ever be bound into > 1 address space thus far - we may as well try to get something more > future proof in place now. With a prep patch before this to switch over > to using the bound list + inactive check, we're now able to carry that > forward for every address space an object is bound into. Signed-off-by: Ben Widawsky <ben@bwidawsk.net> [danvet: Rebase on top of the loss of "drm/i915: Cleanup more of VMA in destroy".] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-08-01 07:00:10 +07:00
ret = i915_vma_unbind(i915_gem_obj_to_vma(obj, vm));
else
ret = i915_gem_execbuffer_reserve_object(obj, ring, vm, need_relocs);
if (ret)
goto err;
}
/* Bind fresh objects */
list_for_each_entry(obj, objects, exec_list) {
if (i915_gem_obj_bound(obj, vm))
continue;
ret = i915_gem_execbuffer_reserve_object(obj, ring, vm, need_relocs);
if (ret)
goto err;
}
err: /* Decrement pin count for bound objects */
list_for_each_entry(obj, objects, exec_list)
i915_gem_execbuffer_unreserve_object(obj);
drm/i915: Track unbound pages When dealing with a working set larger than the GATT, or even the mappable aperture when touching through the GTT, we end up with evicting objects only to rebind them at a new offset again later. Moving an object into and out of the GTT requires clflushing the pages, thus causing a double-clflush penalty for rebinding. To avoid having to clflush on rebinding, we can track the pages as they are evicted from the GTT and only relinquish those pages on memory pressure. As usual, if it were not for the handling of out-of-memory condition and having to manually shrink our own bo caches, it would be a net reduction of code. Alas. Note: The patch also contains a few changes to the last-hope evict_everything logic in i916_gem_execbuffer.c - we no longer try to only evict the purgeable stuff in a first try (since that's superflous and only helps in OOM corner-cases, not fragmented-gtt trashing situations). Also, the extraction of the get_pages retry loop from bind_to_gtt (and other callsites) to get_pages should imo have been a separate patch. v2: Ditch the newly added put_pages (for unbound objects only) in i915_gem_reset. A quick irc discussion hasn't revealed any important reason for this, so if we need this, I'd like to have a git blame'able explanation for it. v3: Undo the s/drm_malloc_ab/kmalloc/ in get_pages that Chris noticed. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: Split out code movements and rant a bit in the commit message with a few Notes. Done v2] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-08-20 16:40:46 +07:00
if (ret != -ENOSPC || retry++)
return ret;
drm/i915: Track unbound pages When dealing with a working set larger than the GATT, or even the mappable aperture when touching through the GTT, we end up with evicting objects only to rebind them at a new offset again later. Moving an object into and out of the GTT requires clflushing the pages, thus causing a double-clflush penalty for rebinding. To avoid having to clflush on rebinding, we can track the pages as they are evicted from the GTT and only relinquish those pages on memory pressure. As usual, if it were not for the handling of out-of-memory condition and having to manually shrink our own bo caches, it would be a net reduction of code. Alas. Note: The patch also contains a few changes to the last-hope evict_everything logic in i916_gem_execbuffer.c - we no longer try to only evict the purgeable stuff in a first try (since that's superflous and only helps in OOM corner-cases, not fragmented-gtt trashing situations). Also, the extraction of the get_pages retry loop from bind_to_gtt (and other callsites) to get_pages should imo have been a separate patch. v2: Ditch the newly added put_pages (for unbound objects only) in i915_gem_reset. A quick irc discussion hasn't revealed any important reason for this, so if we need this, I'd like to have a git blame'able explanation for it. v3: Undo the s/drm_malloc_ab/kmalloc/ in get_pages that Chris noticed. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: Split out code movements and rant a bit in the commit message with a few Notes. Done v2] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-08-20 16:40:46 +07:00
ret = i915_gem_evict_everything(ring->dev);
if (ret)
return ret;
} while (1);
}
static int
i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
struct drm_i915_gem_execbuffer2 *args,
struct drm_file *file,
struct intel_ring_buffer *ring,
struct eb_objects *eb,
struct drm_i915_gem_exec_object2 *exec,
struct i915_address_space *vm)
{
struct drm_i915_gem_relocation_entry *reloc;
struct drm_i915_gem_object *obj;
bool need_relocs;
int *reloc_offset;
int i, total, ret;
int count = args->buffer_count;
/* We may process another execbuffer during the unlock... */
while (!list_empty(&eb->objects)) {
obj = list_first_entry(&eb->objects,
struct drm_i915_gem_object,
exec_list);
list_del_init(&obj->exec_list);
drm_gem_object_unreference(&obj->base);
}
mutex_unlock(&dev->struct_mutex);
total = 0;
for (i = 0; i < count; i++)
total += exec[i].relocation_count;
reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
reloc = drm_malloc_ab(total, sizeof(*reloc));
if (reloc == NULL || reloc_offset == NULL) {
drm_free_large(reloc);
drm_free_large(reloc_offset);
mutex_lock(&dev->struct_mutex);
return -ENOMEM;
}
total = 0;
for (i = 0; i < count; i++) {
struct drm_i915_gem_relocation_entry __user *user_relocs;
u64 invalid_offset = (u64)-1;
int j;
user_relocs = to_user_ptr(exec[i].relocs_ptr);
if (copy_from_user(reloc+total, user_relocs,
exec[i].relocation_count * sizeof(*reloc))) {
ret = -EFAULT;
mutex_lock(&dev->struct_mutex);
goto err;
}
/* As we do not update the known relocation offsets after
* relocating (due to the complexities in lock handling),
* we need to mark them as invalid now so that we force the
* relocation processing next time. Just in case the target
* object is evicted and then rebound into its old
* presumed_offset before the next execbuffer - if that
* happened we would make the mistake of assuming that the
* relocations were valid.
*/
for (j = 0; j < exec[i].relocation_count; j++) {
if (copy_to_user(&user_relocs[j].presumed_offset,
&invalid_offset,
sizeof(invalid_offset))) {
ret = -EFAULT;
mutex_lock(&dev->struct_mutex);
goto err;
}
}
reloc_offset[i] = total;
total += exec[i].relocation_count;
}
ret = i915_mutex_lock_interruptible(dev);
if (ret) {
mutex_lock(&dev->struct_mutex);
goto err;
}
/* reacquire the objects */
eb_reset(eb);
ret = eb_lookup_objects(eb, exec, args, file);
if (ret)
goto err;
need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
ret = i915_gem_execbuffer_reserve(ring, &eb->objects, vm, &need_relocs);
if (ret)
goto err;
list_for_each_entry(obj, &eb->objects, exec_list) {
int offset = obj->exec_entry - exec;
ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
reloc + reloc_offset[offset],
vm);
if (ret)
goto err;
}
/* Leave the user relocations as are, this is the painfully slow path,
* and we want to avoid the complication of dropping the lock whilst
* having buffers reserved in the aperture and so causing spurious
* ENOSPC for random operations.
*/
err:
drm_free_large(reloc);
drm_free_large(reloc_offset);
return ret;
}
static int
i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
struct list_head *objects)
{
struct drm_i915_gem_object *obj;
uint32_t flush_domains = 0;
bool flush_chipset = false;
int ret;
list_for_each_entry(obj, objects, exec_list) {
ret = i915_gem_object_sync(obj, ring);
if (ret)
return ret;
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
flush_chipset |= i915_gem_clflush_object(obj, false);
flush_domains |= obj->base.write_domain;
}
if (flush_chipset)
drm/i915: Stop using AGP layer for GEN6+ As a quick hack we make the old intel_gtt structure mutable so we can fool a bunch of the existing code which depends on elements in that data structure. We can/should try to remove this in a subsequent patch. This should preserve the old gtt init behavior which upon writing these patches seems incorrect. The next patch will fix these things. The one exception is VLV which doesn't have the preserved flush control write behavior. Since we want to do that for all GEN6+ stuff, we'll handle that in a later patch. Mainstream VLV support doesn't actually exist yet anyway. v2: Update the comment to remove the "voodoo" Check that the last pte written matches what we readback v3: actually kill cache_level_to_agp_type since most of the flags will disappear in an upcoming patch v4: v3 was actually not what we wanted (Daniel) Make the ggtt bind assertions better and stricter (Chris) Fix some uncaught errors at gtt init (Chris) Some other random stuff that Chris wanted v5: check for i==0 in gen6_ggtt_bind_object to shut up gcc (Ben) Signed-off-by: Ben Widawsky <ben@bwidawsk.net> Reviewed-by [v4]: Chris Wilson <chris@chris-wilson.co.uk> [danvet: Make the cache_level -> agp_flags conversion for pre-gen6 a tad more robust by mapping everything != CACHE_NONE to the cached agp flag - we have a 1:1 uncached mapping, but different modes of cacheable (at least on later generations). Suggested by Chris Wilson.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-11-05 00:21:27 +07:00
i915_gem_chipset_flush(ring->dev);
if (flush_domains & I915_GEM_DOMAIN_GTT)
wmb();
drm/i915: Insert a flush between batches if the breadcrumb was dropped If we drop the breadcrumb request after a batch due to a signal for example we aim to fix it up at the next opportunity. In this case we emit a second batchbuffer with no waits upon the first and so no opportunity to insert the missing request, so we need to emit the missing flush for coherency. (Note that that invalidating the render cache is the same as flushing it, so there should have been no observable corruption.) Note that beside simply adding the missing flush, avoiding potential render corruption, this will also fix at least parts of the problem introduced by some funny interaction of these two commits: commit de2b998552c1534e87bfbc51ec5734b02bc89020 Author: Daniel Vetter <daniel.vetter@ffwll.ch> Date: Wed Jul 4 22:52:50 2012 +0200 drm/i915: don't return a spurious -EIO from intel_ring_begin which allowed intel_ring_begin to return -ERESTARTSYS and commit cc889e0f6ce6a63c62db17d702ecfed86d58083f Author: Daniel Vetter <daniel.vetter@ffwll.ch> Date: Wed Jun 13 20:45:19 2012 +0200 drm/i915: disable flushing_list/gpu_write_list which essentially disabled the flushing list. The issue happens when we submit a batch & emit it, but get interrupted (thanks to the first patch) while trying to emit the flush. On the next batch we still assume that the full gpu domain handling is in effect and hence compute the invalidate&flushing domains. But thanks to the 2nd patch we totally ignore these and only invalidate all gpu domains, presuming that any required flushes have been issued already. Which is wrong and eventually results in us updating the new write_domain values with the computed pending_write_domain values, which leaves an object with write_domain == 0 on the gpu_write_list. As soon as we try to unbind that object, things blow up. Fix this by emitting the missing flush according to the new ring->gpu_caches_dirty flag. Note that this does _not_ fix all the current cases where we end up with an object on the flushing_list that can't be flushed. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=52040 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: Add bug explanation to commit message.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-07-13 20:14:08 +07:00
/* Unconditionally invalidate gpu caches and ensure that we do flush
* any residual writes from the previous batch.
*/
return intel_ring_invalidate_all_caches(ring);
}
static bool
i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
{
if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
return false;
return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
}
static int
validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
int count)
{
int i;
int relocs_total = 0;
int relocs_max = INT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
for (i = 0; i < count; i++) {
char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
int length; /* limited by fault_in_pages_readable() */
if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS)
return -EINVAL;
/* First check for malicious input causing overflow in
* the worst case where we need to allocate the entire
* relocation tree as a single array.
*/
if (exec[i].relocation_count > relocs_max - relocs_total)
return -EINVAL;
relocs_total += exec[i].relocation_count;
length = exec[i].relocation_count *
sizeof(struct drm_i915_gem_relocation_entry);
/*
* We must check that the entire relocation array is safe
* to read, but since we may need to update the presumed
* offsets during execution, check for full write access.
*/
if (!access_ok(VERIFY_WRITE, ptr, length))
return -EFAULT;
if (likely(!i915_prefault_disable)) {
if (fault_in_multipages_readable(ptr, length))
return -EFAULT;
}
}
return 0;
}
static void
i915_gem_execbuffer_move_to_active(struct list_head *objects,
struct i915_address_space *vm,
drm/i915: Preallocate next seqno before touching the ring Based on the work by Mika Kuoppala, we realised that we need to handle seqno wraparound prior to committing our changes to the ring. The most obvious point then is to grab the seqno inside intel_ring_begin(), and then to reuse that seqno for all ring operations until the next request. As intel_ring_begin() can fail, the callers must already be prepared to handle such failure and so we can safely add further checks. This patch looks like it should be split up into the interface changes and the tweaks to move seqno wrapping from the execbuffer into the core seqno increment. However, I found no easy way to break it into incremental steps without introducing further broken behaviour. v2: Mika found a silly mistake and a subtle error in the existing code; inside i915_gem_retire_requests() we were resetting the sync_seqno of the target ring based on the seqno from this ring - which are only related by the order of their allocation, not retirement. Hence we were applying the optimisation that the rings were synchronised too early, fortunately the only real casualty there is the handling of seqno wrapping. v3: Do not forget to reset the sync_seqno upon module reinitialisation, ala resume. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=863861 Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> [v2] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-11-27 23:22:52 +07:00
struct intel_ring_buffer *ring)
{
struct drm_i915_gem_object *obj;
list_for_each_entry(obj, objects, exec_list) {
u32 old_read = obj->base.read_domains;
u32 old_write = obj->base.write_domain;
obj->base.write_domain = obj->base.pending_write_domain;
if (obj->base.write_domain == 0)
obj->base.pending_read_domains |= obj->base.read_domains;
obj->base.read_domains = obj->base.pending_read_domains;
obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
2013-08-01 07:00:14 +07:00
/* FIXME: This lookup gets fixed later <-- danvet */
list_move_tail(&i915_gem_obj_to_vma(obj, vm)->mm_list, &vm->active_list);
drm/i915: Preallocate next seqno before touching the ring Based on the work by Mika Kuoppala, we realised that we need to handle seqno wraparound prior to committing our changes to the ring. The most obvious point then is to grab the seqno inside intel_ring_begin(), and then to reuse that seqno for all ring operations until the next request. As intel_ring_begin() can fail, the callers must already be prepared to handle such failure and so we can safely add further checks. This patch looks like it should be split up into the interface changes and the tweaks to move seqno wrapping from the execbuffer into the core seqno increment. However, I found no easy way to break it into incremental steps without introducing further broken behaviour. v2: Mika found a silly mistake and a subtle error in the existing code; inside i915_gem_retire_requests() we were resetting the sync_seqno of the target ring based on the seqno from this ring - which are only related by the order of their allocation, not retirement. Hence we were applying the optimisation that the rings were synchronised too early, fortunately the only real casualty there is the handling of seqno wrapping. v3: Do not forget to reset the sync_seqno upon module reinitialisation, ala resume. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=863861 Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> [v2] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-11-27 23:22:52 +07:00
i915_gem_object_move_to_active(obj, ring);
if (obj->base.write_domain) {
obj->dirty = 1;
drm/i915: Preallocate next seqno before touching the ring Based on the work by Mika Kuoppala, we realised that we need to handle seqno wraparound prior to committing our changes to the ring. The most obvious point then is to grab the seqno inside intel_ring_begin(), and then to reuse that seqno for all ring operations until the next request. As intel_ring_begin() can fail, the callers must already be prepared to handle such failure and so we can safely add further checks. This patch looks like it should be split up into the interface changes and the tweaks to move seqno wrapping from the execbuffer into the core seqno increment. However, I found no easy way to break it into incremental steps without introducing further broken behaviour. v2: Mika found a silly mistake and a subtle error in the existing code; inside i915_gem_retire_requests() we were resetting the sync_seqno of the target ring based on the seqno from this ring - which are only related by the order of their allocation, not retirement. Hence we were applying the optimisation that the rings were synchronised too early, fortunately the only real casualty there is the handling of seqno wrapping. v3: Do not forget to reset the sync_seqno upon module reinitialisation, ala resume. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=863861 Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> [v2] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-11-27 23:22:52 +07:00
obj->last_write_seqno = intel_ring_get_seqno(ring);
if (obj->pin_count) /* check for potential scanout */
intel_mark_fb_busy(obj, ring);
}
trace_i915_gem_object_change_domain(obj, old_read, old_write);
}
}
static void
i915_gem_execbuffer_retire_commands(struct drm_device *dev,
struct drm_file *file,
struct intel_ring_buffer *ring,
struct drm_i915_gem_object *obj)
{
2012-06-14 01:45:19 +07:00
/* Unconditionally force add_request to emit a full flush. */
ring->gpu_caches_dirty = true;
/* Add a breadcrumb for the completion of the batch buffer */
(void)__i915_add_request(ring, file, obj, NULL);
}
static int
i915_reset_gen7_sol_offsets(struct drm_device *dev,
struct intel_ring_buffer *ring)
{
drm_i915_private_t *dev_priv = dev->dev_private;
int ret, i;
if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS])
return 0;
ret = intel_ring_begin(ring, 4 * 3);
if (ret)
return ret;
for (i = 0; i < 4; i++) {
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
intel_ring_emit(ring, 0);
}
intel_ring_advance(ring);
return 0;
}
static int
i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct drm_file *file,
struct drm_i915_gem_execbuffer2 *args,
struct drm_i915_gem_exec_object2 *exec,
struct i915_address_space *vm)
{
drm_i915_private_t *dev_priv = dev->dev_private;
struct eb_objects *eb;
struct drm_i915_gem_object *batch_obj;
struct drm_clip_rect *cliprects = NULL;
struct intel_ring_buffer *ring;
u32 ctx_id = i915_execbuffer2_get_context_id(*args);
u32 exec_start, exec_len;
u32 mask, flags;
int ret, mode, i;
bool need_relocs;
if (!i915_gem_check_execbuffer(args))
return -EINVAL;
ret = validate_exec_list(exec, args->buffer_count);
if (ret)
return ret;
flags = 0;
if (args->flags & I915_EXEC_SECURE) {
if (!file->is_master || !capable(CAP_SYS_ADMIN))
return -EPERM;
flags |= I915_DISPATCH_SECURE;
}
if (args->flags & I915_EXEC_IS_PINNED)
flags |= I915_DISPATCH_PINNED;
switch (args->flags & I915_EXEC_RING_MASK) {
case I915_EXEC_DEFAULT:
case I915_EXEC_RENDER:
ring = &dev_priv->ring[RCS];
break;
case I915_EXEC_BSD:
ring = &dev_priv->ring[VCS];
if (ctx_id != DEFAULT_CONTEXT_ID) {
DRM_DEBUG("Ring %s doesn't support contexts\n",
ring->name);
return -EPERM;
}
break;
case I915_EXEC_BLT:
ring = &dev_priv->ring[BCS];
if (ctx_id != DEFAULT_CONTEXT_ID) {
DRM_DEBUG("Ring %s doesn't support contexts\n",
ring->name);
return -EPERM;
}
break;
case I915_EXEC_VEBOX:
ring = &dev_priv->ring[VECS];
if (ctx_id != DEFAULT_CONTEXT_ID) {
DRM_DEBUG("Ring %s doesn't support contexts\n",
ring->name);
return -EPERM;
}
break;
default:
DRM_DEBUG("execbuf with unknown ring: %d\n",
(int)(args->flags & I915_EXEC_RING_MASK));
return -EINVAL;
}
if (!intel_ring_initialized(ring)) {
DRM_DEBUG("execbuf with invalid ring: %d\n",
(int)(args->flags & I915_EXEC_RING_MASK));
return -EINVAL;
}
mode = args->flags & I915_EXEC_CONSTANTS_MASK;
mask = I915_EXEC_CONSTANTS_MASK;
switch (mode) {
case I915_EXEC_CONSTANTS_REL_GENERAL:
case I915_EXEC_CONSTANTS_ABSOLUTE:
case I915_EXEC_CONSTANTS_REL_SURFACE:
if (ring == &dev_priv->ring[RCS] &&
mode != dev_priv->relative_constants_mode) {
if (INTEL_INFO(dev)->gen < 4)
return -EINVAL;
if (INTEL_INFO(dev)->gen > 5 &&
mode == I915_EXEC_CONSTANTS_REL_SURFACE)
return -EINVAL;
/* The HW changed the meaning on this bit on gen6 */
if (INTEL_INFO(dev)->gen >= 6)
mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
}
break;
default:
DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
return -EINVAL;
}
if (args->buffer_count < 1) {
DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
return -EINVAL;
}
if (args->num_cliprects != 0) {
if (ring != &dev_priv->ring[RCS]) {
DRM_DEBUG("clip rectangles are only valid with the render ring\n");
return -EINVAL;
}
if (INTEL_INFO(dev)->gen >= 5) {
DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
return -EINVAL;
}
if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
DRM_DEBUG("execbuf with %u cliprects\n",
args->num_cliprects);
return -EINVAL;
}
cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects),
GFP_KERNEL);
if (cliprects == NULL) {
ret = -ENOMEM;
goto pre_mutex_err;
}
if (copy_from_user(cliprects,
to_user_ptr(args->cliprects_ptr),
sizeof(*cliprects)*args->num_cliprects)) {
ret = -EFAULT;
goto pre_mutex_err;
}
}
ret = i915_mutex_lock_interruptible(dev);
if (ret)
goto pre_mutex_err;
if (dev_priv->ums.mm_suspended) {
mutex_unlock(&dev->struct_mutex);
ret = -EBUSY;
goto pre_mutex_err;
}
eb = eb_create(args);
if (eb == NULL) {
mutex_unlock(&dev->struct_mutex);
ret = -ENOMEM;
goto pre_mutex_err;
}
/* Look up object handles */
ret = eb_lookup_objects(eb, exec, args, file);
if (ret)
goto err;
/* take note of the batch buffer before we might reorder the lists */
batch_obj = list_entry(eb->objects.prev,
struct drm_i915_gem_object,
exec_list);
/* Move the objects en-masse into the GTT, evicting if necessary. */
need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
ret = i915_gem_execbuffer_reserve(ring, &eb->objects, vm, &need_relocs);
if (ret)
goto err;
/* The objects are in their final locations, apply the relocations. */
if (need_relocs)
ret = i915_gem_execbuffer_relocate(eb, vm);
if (ret) {
if (ret == -EFAULT) {
ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
eb, exec, vm);
BUG_ON(!mutex_is_locked(&dev->struct_mutex));
}
if (ret)
goto err;
}
/* Set the pending read domains for the batch buffer to COMMAND */
if (batch_obj->base.pending_write_domain) {
DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
ret = -EINVAL;
goto err;
}
batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
* hsw should have this fixed, but let's be paranoid and do it
* unconditionally for now. */
if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->objects);
if (ret)
goto err;
ret = i915_switch_context(ring, file, ctx_id);
if (ret)
goto err;
if (ring == &dev_priv->ring[RCS] &&
mode != dev_priv->relative_constants_mode) {
ret = intel_ring_begin(ring, 4);
if (ret)
goto err;
intel_ring_emit(ring, MI_NOOP);
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit(ring, INSTPM);
intel_ring_emit(ring, mask << 16 | mode);
intel_ring_advance(ring);
dev_priv->relative_constants_mode = mode;
}
if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
ret = i915_reset_gen7_sol_offsets(dev, ring);
if (ret)
goto err;
}
exec_start = i915_gem_obj_offset(batch_obj, vm) +
args->batch_start_offset;
exec_len = args->batch_len;
if (cliprects) {
for (i = 0; i < args->num_cliprects; i++) {
ret = i915_emit_box(dev, &cliprects[i],
args->DR1, args->DR4);
if (ret)
goto err;
ret = ring->dispatch_execbuffer(ring,
exec_start, exec_len,
flags);
if (ret)
goto err;
}
} else {
ret = ring->dispatch_execbuffer(ring,
exec_start, exec_len,
flags);
if (ret)
goto err;
}
drm/i915: Preallocate next seqno before touching the ring Based on the work by Mika Kuoppala, we realised that we need to handle seqno wraparound prior to committing our changes to the ring. The most obvious point then is to grab the seqno inside intel_ring_begin(), and then to reuse that seqno for all ring operations until the next request. As intel_ring_begin() can fail, the callers must already be prepared to handle such failure and so we can safely add further checks. This patch looks like it should be split up into the interface changes and the tweaks to move seqno wrapping from the execbuffer into the core seqno increment. However, I found no easy way to break it into incremental steps without introducing further broken behaviour. v2: Mika found a silly mistake and a subtle error in the existing code; inside i915_gem_retire_requests() we were resetting the sync_seqno of the target ring based on the seqno from this ring - which are only related by the order of their allocation, not retirement. Hence we were applying the optimisation that the rings were synchronised too early, fortunately the only real casualty there is the handling of seqno wrapping. v3: Do not forget to reset the sync_seqno upon module reinitialisation, ala resume. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=863861 Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> [v2] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-11-27 23:22:52 +07:00
trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
i915_gem_execbuffer_move_to_active(&eb->objects, vm, ring);
i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
err:
eb_destroy(eb);
mutex_unlock(&dev->struct_mutex);
pre_mutex_err:
kfree(cliprects);
return ret;
}
/*
* Legacy execbuffer just creates an exec2 list from the original exec object
* list array and passes it to the real function.
*/
int
i915_gem_execbuffer(struct drm_device *dev, void *data,
struct drm_file *file)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_execbuffer *args = data;
struct drm_i915_gem_execbuffer2 exec2;
struct drm_i915_gem_exec_object *exec_list = NULL;
struct drm_i915_gem_exec_object2 *exec2_list = NULL;
int ret, i;
if (args->buffer_count < 1) {
DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
return -EINVAL;
}
/* Copy in the exec list from userland */
exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
if (exec_list == NULL || exec2_list == NULL) {
DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
args->buffer_count);
drm_free_large(exec_list);
drm_free_large(exec2_list);
return -ENOMEM;
}
ret = copy_from_user(exec_list,
to_user_ptr(args->buffers_ptr),
sizeof(*exec_list) * args->buffer_count);
if (ret != 0) {
DRM_DEBUG("copy %d exec entries failed %d\n",
args->buffer_count, ret);
drm_free_large(exec_list);
drm_free_large(exec2_list);
return -EFAULT;
}
for (i = 0; i < args->buffer_count; i++) {
exec2_list[i].handle = exec_list[i].handle;
exec2_list[i].relocation_count = exec_list[i].relocation_count;
exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
exec2_list[i].alignment = exec_list[i].alignment;
exec2_list[i].offset = exec_list[i].offset;
if (INTEL_INFO(dev)->gen < 4)
exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
else
exec2_list[i].flags = 0;
}
exec2.buffers_ptr = args->buffers_ptr;
exec2.buffer_count = args->buffer_count;
exec2.batch_start_offset = args->batch_start_offset;
exec2.batch_len = args->batch_len;
exec2.DR1 = args->DR1;
exec2.DR4 = args->DR4;
exec2.num_cliprects = args->num_cliprects;
exec2.cliprects_ptr = args->cliprects_ptr;
exec2.flags = I915_EXEC_RENDER;
i915_execbuffer2_set_context_id(exec2, 0);
ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list,
&dev_priv->gtt.base);
if (!ret) {
/* Copy the new buffer offsets back to the user's exec list. */
for (i = 0; i < args->buffer_count; i++)
exec_list[i].offset = exec2_list[i].offset;
/* ... and back out to userspace */
ret = copy_to_user(to_user_ptr(args->buffers_ptr),
exec_list,
sizeof(*exec_list) * args->buffer_count);
if (ret) {
ret = -EFAULT;
DRM_DEBUG("failed to copy %d exec entries "
"back to user (%d)\n",
args->buffer_count, ret);
}
}
drm_free_large(exec_list);
drm_free_large(exec2_list);
return ret;
}
int
i915_gem_execbuffer2(struct drm_device *dev, void *data,
struct drm_file *file)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_execbuffer2 *args = data;
struct drm_i915_gem_exec_object2 *exec2_list = NULL;
int ret;
if (args->buffer_count < 1 ||
args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
return -EINVAL;
}
exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
if (exec2_list == NULL)
exec2_list = drm_malloc_ab(sizeof(*exec2_list),
args->buffer_count);
if (exec2_list == NULL) {
DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
args->buffer_count);
return -ENOMEM;
}
ret = copy_from_user(exec2_list,
to_user_ptr(args->buffers_ptr),
sizeof(*exec2_list) * args->buffer_count);
if (ret != 0) {
DRM_DEBUG("copy %d exec entries failed %d\n",
args->buffer_count, ret);
drm_free_large(exec2_list);
return -EFAULT;
}
ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list,
&dev_priv->gtt.base);
if (!ret) {
/* Copy the new buffer offsets back to the user's exec list. */
ret = copy_to_user(to_user_ptr(args->buffers_ptr),
exec2_list,
sizeof(*exec2_list) * args->buffer_count);
if (ret) {
ret = -EFAULT;
DRM_DEBUG("failed to copy %d exec entries "
"back to user (%d)\n",
args->buffer_count, ret);
}
}
drm_free_large(exec2_list);
return ret;
}