drm/i915: Avoid using mappable space for relocation processing through the CPU

We try to avoid writing the relocations through the uncached GTT, if the
buffer is currently in the CPU write domain and so will be flushed out to
main memory afterwards anyway. Also on SandyBridge we can safely write
to the pages in cacheable memory, so long as the buffer is LLC mapped.
In either of these cases, we therefore do not need to force the
reallocation of the buffer into the mappable region of the GTT, reducing
the aperture pressure.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
This commit is contained in:
Chris Wilson 2012-03-26 10:10:27 +02:00 committed by Daniel Vetter
parent d1dd20a965
commit dabdfe021a
3 changed files with 28 additions and 14 deletions

View File

@ -1266,6 +1266,8 @@ int __must_check
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
bool write); bool write);
int __must_check int __must_check
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
int __must_check
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
u32 alignment, u32 alignment,
struct intel_ring_buffer *pipelined); struct intel_ring_buffer *pipelined);

View File

@ -39,8 +39,6 @@
static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj); static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
static __must_check int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj,
bool write);
static __must_check int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj, static __must_check int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
uint64_t offset, uint64_t offset,
uint64_t size); uint64_t size);
@ -3073,7 +3071,7 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
* This function returns when the move is complete, including waiting on * This function returns when the move is complete, including waiting on
* flushes to occur. * flushes to occur.
*/ */
static int int
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
{ {
uint32_t old_write_domain, old_read_domains; uint32_t old_write_domain, old_read_domains;

View File

@ -266,6 +266,12 @@ eb_destroy(struct eb_objects *eb)
kfree(eb); kfree(eb);
} }
static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
{
return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
obj->cache_level != I915_CACHE_NONE);
}
static int static int
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
struct eb_objects *eb, struct eb_objects *eb,
@ -354,11 +360,19 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
return ret; return ret;
} }
/* We can't wait for rendering with pagefaults disabled */
if (obj->active && in_atomic())
return -EFAULT;
reloc->delta += target_offset; reloc->delta += target_offset;
if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) { if (use_cpu_reloc(obj)) {
uint32_t page_offset = reloc->offset & ~PAGE_MASK; uint32_t page_offset = reloc->offset & ~PAGE_MASK;
char *vaddr; char *vaddr;
ret = i915_gem_object_set_to_cpu_domain(obj, 1);
if (ret)
return ret;
vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]); vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
*(uint32_t *)(vaddr + page_offset) = reloc->delta; *(uint32_t *)(vaddr + page_offset) = reloc->delta;
kunmap_atomic(vaddr); kunmap_atomic(vaddr);
@ -367,10 +381,6 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
uint32_t __iomem *reloc_entry; uint32_t __iomem *reloc_entry;
void __iomem *reloc_page; void __iomem *reloc_page;
/* We can't wait for rendering with pagefaults disabled */
if (obj->active && in_atomic())
return -EFAULT;
ret = i915_gem_object_set_to_gtt_domain(obj, 1); ret = i915_gem_object_set_to_gtt_domain(obj, 1);
if (ret) if (ret)
return ret; return ret;
@ -492,6 +502,13 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
#define __EXEC_OBJECT_HAS_FENCE (1<<31) #define __EXEC_OBJECT_HAS_FENCE (1<<31)
static int
need_reloc_mappable(struct drm_i915_gem_object *obj)
{
struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
return entry->relocation_count && !use_cpu_reloc(obj);
}
static int static int
pin_and_fence_object(struct drm_i915_gem_object *obj, pin_and_fence_object(struct drm_i915_gem_object *obj,
struct intel_ring_buffer *ring) struct intel_ring_buffer *ring)
@ -505,8 +522,7 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
has_fenced_gpu_access && has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE && entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE; obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_mappable = need_fence || need_reloc_mappable(obj);
entry->relocation_count ? true : need_fence;
ret = i915_gem_object_pin(obj, entry->alignment, need_mappable); ret = i915_gem_object_pin(obj, entry->alignment, need_mappable);
if (ret) if (ret)
@ -563,8 +579,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
has_fenced_gpu_access && has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE && entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE; obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_mappable = need_fence || need_reloc_mappable(obj);
entry->relocation_count ? true : need_fence;
if (need_mappable) if (need_mappable)
list_move(&obj->exec_list, &ordered_objects); list_move(&obj->exec_list, &ordered_objects);
@ -604,8 +619,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
has_fenced_gpu_access && has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE && entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE; obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_mappable = need_fence || need_reloc_mappable(obj);
entry->relocation_count ? true : need_fence;
if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
(need_mappable && !obj->map_and_fenceable)) (need_mappable && !obj->map_and_fenceable))