2010-11-06 04:23:30 +07:00
|
|
|
/*
|
|
|
|
* Copyright © 2010 Daniel Vetter
|
2014-02-20 13:05:47 +07:00
|
|
|
* Copyright © 2011-2014 Intel Corporation
|
2010-11-06 04:23:30 +07:00
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2017-02-14 00:15:44 +07:00
|
|
|
#include <linux/slab.h> /* fault-inject.h is not standalone! */
|
|
|
|
|
|
|
|
#include <linux/fault-inject.h>
|
2017-01-11 18:23:10 +07:00
|
|
|
#include <linux/log2.h>
|
2017-01-11 18:23:12 +07:00
|
|
|
#include <linux/random.h>
|
2014-01-08 22:10:27 +07:00
|
|
|
#include <linux/seq_file.h>
|
2015-10-24 00:43:32 +07:00
|
|
|
#include <linux/stop_machine.h>
|
2017-01-11 18:23:10 +07:00
|
|
|
|
2017-05-09 05:58:17 +07:00
|
|
|
#include <asm/set_memory.h>
|
2019-08-21 16:39:05 +07:00
|
|
|
#include <asm/smp.h>
|
2017-05-09 05:58:17 +07:00
|
|
|
|
2012-10-03 00:01:07 +07:00
|
|
|
#include <drm/i915_drm.h>
|
2017-01-11 18:23:10 +07:00
|
|
|
|
2019-06-13 15:44:16 +07:00
|
|
|
#include "display/intel_frontbuffer.h"
|
2019-06-21 14:07:44 +07:00
|
|
|
#include "gt/intel_gt.h"
|
2019-10-04 20:40:06 +07:00
|
|
|
#include "gt/intel_gt_requests.h"
|
2019-06-13 15:44:16 +07:00
|
|
|
|
2010-11-06 04:23:30 +07:00
|
|
|
#include "i915_drv.h"
|
2019-05-28 16:29:50 +07:00
|
|
|
#include "i915_scatterlist.h"
|
2010-11-06 04:23:30 +07:00
|
|
|
#include "i915_trace.h"
|
2019-05-28 16:29:50 +07:00
|
|
|
#include "i915_vgpu.h"
|
2010-11-06 04:23:30 +07:00
|
|
|
|
2018-05-22 15:36:43 +07:00
|
|
|
#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
|
2016-08-22 14:44:31 +07:00
|
|
|
|
2019-07-12 18:27:23 +07:00
|
|
|
#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
|
|
|
|
#define DBG(...) trace_printk(__VA_ARGS__)
|
|
|
|
#else
|
|
|
|
#define DBG(...)
|
|
|
|
#endif
|
|
|
|
|
2014-12-11 00:27:59 +07:00
|
|
|
/**
|
|
|
|
* DOC: Global GTT views
|
|
|
|
*
|
|
|
|
* Background and previous state
|
|
|
|
*
|
|
|
|
* Historically objects could exists (be bound) in global GTT space only as
|
|
|
|
* singular instances with a view representing all of the object's backing pages
|
|
|
|
* in a linear fashion. This view will be called a normal view.
|
|
|
|
*
|
|
|
|
* To support multiple views of the same object, where the number of mapped
|
|
|
|
* pages is not equal to the backing store, or where the layout of the pages
|
|
|
|
* is not linear, concept of a GGTT view was added.
|
|
|
|
*
|
|
|
|
* One example of an alternative view is a stereo display driven by a single
|
|
|
|
* image. In this case we would have a framebuffer looking like this
|
|
|
|
* (2x2 pages):
|
|
|
|
*
|
|
|
|
* 12
|
|
|
|
* 34
|
|
|
|
*
|
|
|
|
* Above would represent a normal GGTT view as normally mapped for GPU or CPU
|
|
|
|
* rendering. In contrast, fed to the display engine would be an alternative
|
|
|
|
* view which could look something like this:
|
|
|
|
*
|
|
|
|
* 1212
|
|
|
|
* 3434
|
|
|
|
*
|
|
|
|
* In this example both the size and layout of pages in the alternative view is
|
|
|
|
* different from the normal view.
|
|
|
|
*
|
|
|
|
* Implementation and usage
|
|
|
|
*
|
|
|
|
* GGTT views are implemented using VMAs and are distinguished via enum
|
|
|
|
* i915_ggtt_view_type and struct i915_ggtt_view.
|
|
|
|
*
|
|
|
|
* A new flavour of core GEM functions which work with GGTT bound objects were
|
2015-03-16 19:11:13 +07:00
|
|
|
* added with the _ggtt_ infix, and sometimes with _view postfix to avoid
|
|
|
|
* renaming in large amounts of code. They take the struct i915_ggtt_view
|
|
|
|
* parameter encapsulating all metadata required to implement a view.
|
2014-12-11 00:27:59 +07:00
|
|
|
*
|
|
|
|
* As a helper for callers which are only interested in the normal view,
|
|
|
|
* globally const i915_ggtt_view_normal singleton instance exists. All old core
|
|
|
|
* GEM API functions, the ones not taking the view parameter, are operating on,
|
|
|
|
* or with the normal GGTT view.
|
|
|
|
*
|
|
|
|
* Code wanting to add or use a new GGTT view needs to:
|
|
|
|
*
|
|
|
|
* 1. Add a new enum with a suitable name.
|
|
|
|
* 2. Extend the metadata in the i915_ggtt_view structure if required.
|
|
|
|
* 3. Add support to i915_get_vma_pages().
|
|
|
|
*
|
|
|
|
* New views are required to build a scatter-gather table from within the
|
|
|
|
* i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
|
|
|
|
* exists for the lifetime of an VMA.
|
|
|
|
*
|
|
|
|
* Core API is designed to have copy semantics which means that passed in
|
|
|
|
* struct i915_ggtt_view does not need to be persistent (left around after
|
|
|
|
* calling the core API functions).
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2019-07-12 16:43:22 +07:00
|
|
|
#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt)
|
|
|
|
|
2015-04-14 22:35:27 +07:00
|
|
|
static int
|
|
|
|
i915_get_ggtt_vma_pages(struct i915_vma *vma);
|
|
|
|
|
2019-06-21 14:07:58 +07:00
|
|
|
static void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
|
2017-01-12 18:00:49 +07:00
|
|
|
{
|
2019-08-16 15:31:43 +07:00
|
|
|
struct intel_uncore *uncore = ggtt->vm.gt->uncore;
|
2019-06-04 19:00:22 +07:00
|
|
|
|
2018-05-08 19:41:54 +07:00
|
|
|
/*
|
|
|
|
* Note that as an uncached mmio write, this will flush the
|
2017-01-12 18:00:49 +07:00
|
|
|
* WCB of the writes into the GGTT before it triggers the invalidate.
|
|
|
|
*/
|
2019-06-04 19:00:22 +07:00
|
|
|
intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
|
2017-01-12 18:00:49 +07:00
|
|
|
}
|
|
|
|
|
2019-06-21 14:07:58 +07:00
|
|
|
static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
|
2017-01-12 18:00:49 +07:00
|
|
|
{
|
2019-08-16 15:31:43 +07:00
|
|
|
struct intel_uncore *uncore = ggtt->vm.gt->uncore;
|
2019-08-23 15:20:33 +07:00
|
|
|
struct drm_i915_private *i915 = ggtt->vm.i915;
|
2019-06-04 19:00:22 +07:00
|
|
|
|
2019-06-21 14:07:58 +07:00
|
|
|
gen6_ggtt_invalidate(ggtt);
|
2019-08-23 15:20:33 +07:00
|
|
|
|
|
|
|
if (INTEL_GEN(i915) >= 12)
|
|
|
|
intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
|
|
|
|
GEN12_GUC_TLB_INV_CR_INVALIDATE);
|
|
|
|
else
|
|
|
|
intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
|
2017-01-12 18:00:49 +07:00
|
|
|
}
|
|
|
|
|
2019-06-21 14:07:58 +07:00
|
|
|
static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
|
2017-01-12 18:00:49 +07:00
|
|
|
{
|
|
|
|
intel_gtt_chipset_flush();
|
|
|
|
}
|
|
|
|
|
2018-06-14 20:43:14 +07:00
|
|
|
static int ppgtt_bind_vma(struct i915_vma *vma,
|
|
|
|
enum i915_cache_level cache_level,
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
u32 flags)
|
2015-04-14 22:35:24 +07:00
|
|
|
{
|
2017-02-15 15:43:42 +07:00
|
|
|
u32 pte_flags;
|
2018-06-14 20:43:14 +07:00
|
|
|
int err;
|
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
if (flags & I915_VMA_ALLOC) {
|
2018-06-14 20:43:14 +07:00
|
|
|
err = vma->vm->allocate_va_range(vma->vm,
|
|
|
|
vma->node.start, vma->size);
|
|
|
|
if (err)
|
|
|
|
return err;
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
|
|
|
|
set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
|
2018-06-14 20:43:14 +07:00
|
|
|
}
|
2015-04-14 22:35:24 +07:00
|
|
|
|
2018-07-13 01:53:11 +07:00
|
|
|
/* Applicable to VLV, and gen8+ */
|
2017-02-15 15:43:42 +07:00
|
|
|
pte_flags = 0;
|
2018-07-13 01:53:13 +07:00
|
|
|
if (i915_gem_object_is_readonly(vma->obj))
|
2015-04-14 22:35:24 +07:00
|
|
|
pte_flags |= PTE_READ_ONLY;
|
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)));
|
2017-06-22 16:58:36 +07:00
|
|
|
vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
|
2019-08-23 21:14:21 +07:00
|
|
|
wmb();
|
2015-04-14 22:35:27 +07:00
|
|
|
|
|
|
|
return 0;
|
2015-04-14 22:35:24 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static void ppgtt_unbind_vma(struct i915_vma *vma)
|
|
|
|
{
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)))
|
|
|
|
vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
|
2015-04-14 22:35:24 +07:00
|
|
|
}
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 05:10:56 +07:00
|
|
|
|
2017-10-07 05:18:19 +07:00
|
|
|
static int ppgtt_set_pages(struct i915_vma *vma)
|
|
|
|
{
|
|
|
|
GEM_BUG_ON(vma->pages);
|
|
|
|
|
|
|
|
vma->pages = vma->obj->mm.pages;
|
|
|
|
|
2017-10-07 05:18:20 +07:00
|
|
|
vma->page_sizes = vma->obj->mm.page_sizes;
|
|
|
|
|
2017-10-07 05:18:19 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void clear_pages(struct i915_vma *vma)
|
|
|
|
{
|
|
|
|
GEM_BUG_ON(!vma->pages);
|
|
|
|
|
|
|
|
if (vma->pages != vma->obj->mm.pages) {
|
|
|
|
sg_free_table(vma->pages);
|
|
|
|
kfree(vma->pages);
|
|
|
|
}
|
|
|
|
vma->pages = NULL;
|
2017-10-07 05:18:20 +07:00
|
|
|
|
|
|
|
memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
|
2017-10-07 05:18:19 +07:00
|
|
|
}
|
|
|
|
|
2018-10-30 01:27:20 +07:00
|
|
|
static u64 gen8_pte_encode(dma_addr_t addr,
|
|
|
|
enum i915_cache_level level,
|
|
|
|
u32 flags)
|
2013-11-03 11:07:18 +07:00
|
|
|
{
|
2018-07-13 01:53:10 +07:00
|
|
|
gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
|
|
|
|
|
|
|
|
if (unlikely(flags & PTE_READ_ONLY))
|
|
|
|
pte &= ~_PAGE_RW;
|
2014-04-19 04:04:27 +07:00
|
|
|
|
|
|
|
switch (level) {
|
|
|
|
case I915_CACHE_NONE:
|
2017-09-14 19:39:41 +07:00
|
|
|
pte |= PPAT_UNCACHED;
|
2014-04-19 04:04:27 +07:00
|
|
|
break;
|
|
|
|
case I915_CACHE_WT:
|
2017-09-14 19:39:41 +07:00
|
|
|
pte |= PPAT_DISPLAY_ELLC;
|
2014-04-19 04:04:27 +07:00
|
|
|
break;
|
|
|
|
default:
|
2017-09-14 19:39:41 +07:00
|
|
|
pte |= PPAT_CACHED;
|
2014-04-19 04:04:27 +07:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2013-11-03 11:07:18 +07:00
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
2019-07-06 04:52:01 +07:00
|
|
|
static u64 gen8_pde_encode(const dma_addr_t addr,
|
|
|
|
const enum i915_cache_level level)
|
2013-11-05 12:20:14 +07:00
|
|
|
{
|
2019-07-06 04:52:01 +07:00
|
|
|
u64 pde = _PAGE_PRESENT | _PAGE_RW;
|
2013-11-05 12:20:14 +07:00
|
|
|
pde |= addr;
|
|
|
|
if (level != I915_CACHE_NONE)
|
2017-09-14 19:39:41 +07:00
|
|
|
pde |= PPAT_CACHED_PDE;
|
2013-11-05 12:20:14 +07:00
|
|
|
else
|
2017-09-14 19:39:41 +07:00
|
|
|
pde |= PPAT_UNCACHED;
|
2013-11-05 12:20:14 +07:00
|
|
|
return pde;
|
|
|
|
}
|
|
|
|
|
2018-10-30 01:27:20 +07:00
|
|
|
static u64 snb_pte_encode(dma_addr_t addr,
|
|
|
|
enum i915_cache_level level,
|
|
|
|
u32 flags)
|
2012-09-25 06:44:32 +07:00
|
|
|
{
|
2016-10-13 19:02:40 +07:00
|
|
|
gen6_pte_t pte = GEN6_PTE_VALID;
|
2012-09-25 06:44:32 +07:00
|
|
|
pte |= GEN6_PTE_ADDR_ENCODE(addr);
|
2012-10-19 23:33:22 +07:00
|
|
|
|
|
|
|
switch (level) {
|
2013-08-06 19:17:02 +07:00
|
|
|
case I915_CACHE_L3_LLC:
|
|
|
|
case I915_CACHE_LLC:
|
|
|
|
pte |= GEN6_PTE_CACHE_LLC;
|
|
|
|
break;
|
|
|
|
case I915_CACHE_NONE:
|
|
|
|
pte |= GEN6_PTE_UNCACHED;
|
|
|
|
break;
|
|
|
|
default:
|
2014-12-08 22:40:10 +07:00
|
|
|
MISSING_CASE(level);
|
2013-08-06 19:17:02 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
2018-10-30 01:27:20 +07:00
|
|
|
static u64 ivb_pte_encode(dma_addr_t addr,
|
|
|
|
enum i915_cache_level level,
|
|
|
|
u32 flags)
|
2013-08-06 19:17:02 +07:00
|
|
|
{
|
2016-10-13 19:02:40 +07:00
|
|
|
gen6_pte_t pte = GEN6_PTE_VALID;
|
2013-08-06 19:17:02 +07:00
|
|
|
pte |= GEN6_PTE_ADDR_ENCODE(addr);
|
|
|
|
|
|
|
|
switch (level) {
|
|
|
|
case I915_CACHE_L3_LLC:
|
|
|
|
pte |= GEN7_PTE_CACHE_L3_LLC;
|
2012-10-19 23:33:22 +07:00
|
|
|
break;
|
|
|
|
case I915_CACHE_LLC:
|
|
|
|
pte |= GEN6_PTE_CACHE_LLC;
|
|
|
|
break;
|
|
|
|
case I915_CACHE_NONE:
|
2013-04-22 14:53:51 +07:00
|
|
|
pte |= GEN6_PTE_UNCACHED;
|
2012-10-19 23:33:22 +07:00
|
|
|
break;
|
|
|
|
default:
|
2014-12-08 22:40:10 +07:00
|
|
|
MISSING_CASE(level);
|
2012-10-19 23:33:22 +07:00
|
|
|
}
|
|
|
|
|
2012-09-25 06:44:32 +07:00
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
2018-10-30 01:27:20 +07:00
|
|
|
static u64 byt_pte_encode(dma_addr_t addr,
|
|
|
|
enum i915_cache_level level,
|
|
|
|
u32 flags)
|
2013-04-22 14:53:50 +07:00
|
|
|
{
|
2016-10-13 19:02:40 +07:00
|
|
|
gen6_pte_t pte = GEN6_PTE_VALID;
|
2013-04-22 14:53:50 +07:00
|
|
|
pte |= GEN6_PTE_ADDR_ENCODE(addr);
|
|
|
|
|
2014-06-17 12:29:42 +07:00
|
|
|
if (!(flags & PTE_READ_ONLY))
|
|
|
|
pte |= BYT_PTE_WRITEABLE;
|
2013-04-22 14:53:50 +07:00
|
|
|
|
|
|
|
if (level != I915_CACHE_NONE)
|
|
|
|
pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
|
|
|
|
|
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
2018-10-30 01:27:20 +07:00
|
|
|
static u64 hsw_pte_encode(dma_addr_t addr,
|
|
|
|
enum i915_cache_level level,
|
|
|
|
u32 flags)
|
2013-04-22 14:53:51 +07:00
|
|
|
{
|
2016-10-13 19:02:40 +07:00
|
|
|
gen6_pte_t pte = GEN6_PTE_VALID;
|
2013-07-05 01:02:03 +07:00
|
|
|
pte |= HSW_PTE_ADDR_ENCODE(addr);
|
2013-04-22 14:53:51 +07:00
|
|
|
|
|
|
|
if (level != I915_CACHE_NONE)
|
2013-08-05 13:47:29 +07:00
|
|
|
pte |= HSW_WB_LLC_AGE3;
|
2013-04-22 14:53:51 +07:00
|
|
|
|
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
2018-10-30 01:27:20 +07:00
|
|
|
static u64 iris_pte_encode(dma_addr_t addr,
|
|
|
|
enum i915_cache_level level,
|
|
|
|
u32 flags)
|
2013-07-05 01:02:06 +07:00
|
|
|
{
|
2016-10-13 19:02:40 +07:00
|
|
|
gen6_pte_t pte = GEN6_PTE_VALID;
|
2013-07-05 01:02:06 +07:00
|
|
|
pte |= HSW_PTE_ADDR_ENCODE(addr);
|
|
|
|
|
2013-08-08 20:41:10 +07:00
|
|
|
switch (level) {
|
|
|
|
case I915_CACHE_NONE:
|
|
|
|
break;
|
|
|
|
case I915_CACHE_WT:
|
2013-11-22 17:37:53 +07:00
|
|
|
pte |= HSW_WT_ELLC_LLC_AGE3;
|
2013-08-08 20:41:10 +07:00
|
|
|
break;
|
|
|
|
default:
|
2013-11-22 17:37:53 +07:00
|
|
|
pte |= HSW_WB_ELLC_LLC_AGE3;
|
2013-08-08 20:41:10 +07:00
|
|
|
break;
|
|
|
|
}
|
2013-07-05 01:02:06 +07:00
|
|
|
|
|
|
|
return pte;
|
|
|
|
}
|
|
|
|
|
2018-07-05 01:55:18 +07:00
|
|
|
static void stash_init(struct pagestash *stash)
|
|
|
|
{
|
|
|
|
pagevec_init(&stash->pvec);
|
|
|
|
spin_lock_init(&stash->lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct page *stash_pop_page(struct pagestash *stash)
|
|
|
|
{
|
|
|
|
struct page *page = NULL;
|
|
|
|
|
|
|
|
spin_lock(&stash->lock);
|
|
|
|
if (likely(stash->pvec.nr))
|
|
|
|
page = stash->pvec.pages[--stash->pvec.nr];
|
|
|
|
spin_unlock(&stash->lock);
|
|
|
|
|
|
|
|
return page;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
|
|
|
|
{
|
2019-05-29 16:34:07 +07:00
|
|
|
unsigned int nr;
|
2018-07-05 01:55:18 +07:00
|
|
|
|
|
|
|
spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
|
|
|
|
|
2019-05-29 16:34:07 +07:00
|
|
|
nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec));
|
2018-07-05 01:55:18 +07:00
|
|
|
memcpy(stash->pvec.pages + stash->pvec.nr,
|
|
|
|
pvec->pages + pvec->nr - nr,
|
|
|
|
sizeof(pvec->pages[0]) * nr);
|
|
|
|
stash->pvec.nr += nr;
|
|
|
|
|
|
|
|
spin_unlock(&stash->lock);
|
|
|
|
|
|
|
|
pvec->nr -= nr;
|
|
|
|
}
|
|
|
|
|
2017-02-15 15:43:40 +07:00
|
|
|
static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
|
2015-03-16 23:00:56 +07:00
|
|
|
{
|
2018-07-05 01:55:18 +07:00
|
|
|
struct pagevec stack;
|
|
|
|
struct page *page;
|
2015-03-16 23:00:56 +07:00
|
|
|
|
2017-02-15 15:43:40 +07:00
|
|
|
if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
|
|
|
|
i915_gem_shrink_all(vm->i915);
|
2017-02-14 00:15:44 +07:00
|
|
|
|
2018-07-05 01:55:18 +07:00
|
|
|
page = stash_pop_page(&vm->free_pages);
|
|
|
|
if (page)
|
|
|
|
return page;
|
2017-08-23 00:38:28 +07:00
|
|
|
|
|
|
|
if (!vm->pt_kmap_wc)
|
|
|
|
return alloc_page(gfp);
|
|
|
|
|
|
|
|
/* Look in our global stash of WC pages... */
|
2018-07-05 01:55:18 +07:00
|
|
|
page = stash_pop_page(&vm->i915->mm.wc_stash);
|
|
|
|
if (page)
|
|
|
|
return page;
|
2017-08-23 00:38:28 +07:00
|
|
|
|
2018-01-22 00:31:43 +07:00
|
|
|
/*
|
2018-07-05 01:55:18 +07:00
|
|
|
* Otherwise batch allocate pages to amortize cost of set_pages_wc.
|
2018-01-22 00:31:43 +07:00
|
|
|
*
|
|
|
|
* We have to be careful as page allocation may trigger the shrinker
|
|
|
|
* (via direct reclaim) which will fill up the WC stash underneath us.
|
|
|
|
* So we add our WB pages into a temporary pvec on the stack and merge
|
|
|
|
* them into the WC stash after all the allocations are complete.
|
|
|
|
*/
|
2018-07-05 01:55:18 +07:00
|
|
|
pagevec_init(&stack);
|
2017-08-23 00:38:28 +07:00
|
|
|
do {
|
|
|
|
struct page *page;
|
2017-02-15 15:43:40 +07:00
|
|
|
|
2017-08-23 00:38:28 +07:00
|
|
|
page = alloc_page(gfp);
|
|
|
|
if (unlikely(!page))
|
|
|
|
break;
|
|
|
|
|
2018-07-05 01:55:18 +07:00
|
|
|
stack.pages[stack.nr++] = page;
|
|
|
|
} while (pagevec_space(&stack));
|
2017-08-23 00:38:28 +07:00
|
|
|
|
2018-07-05 01:55:18 +07:00
|
|
|
if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
|
|
|
|
page = stack.pages[--stack.nr];
|
2017-02-15 15:43:40 +07:00
|
|
|
|
2018-07-05 01:55:18 +07:00
|
|
|
/* Merge spare WC pages to the global stash */
|
2019-05-29 16:34:07 +07:00
|
|
|
if (stack.nr)
|
|
|
|
stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
|
2018-01-22 00:31:43 +07:00
|
|
|
|
2018-07-05 01:55:18 +07:00
|
|
|
/* Push any surplus WC pages onto the local VM stash */
|
|
|
|
if (stack.nr)
|
|
|
|
stash_push_pagevec(&vm->free_pages, &stack);
|
2018-01-22 00:31:43 +07:00
|
|
|
}
|
2017-02-15 15:43:40 +07:00
|
|
|
|
2018-07-05 01:55:18 +07:00
|
|
|
/* Return unwanted leftovers */
|
|
|
|
if (unlikely(stack.nr)) {
|
|
|
|
WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
|
|
|
|
__pagevec_release(&stack);
|
|
|
|
}
|
|
|
|
|
|
|
|
return page;
|
2017-02-15 15:43:40 +07:00
|
|
|
}
|
|
|
|
|
2017-08-23 00:38:28 +07:00
|
|
|
static void vm_free_pages_release(struct i915_address_space *vm,
|
|
|
|
bool immediate)
|
2017-02-15 15:43:40 +07:00
|
|
|
{
|
2018-07-05 01:55:18 +07:00
|
|
|
struct pagevec *pvec = &vm->free_pages.pvec;
|
|
|
|
struct pagevec stack;
|
2017-08-23 00:38:28 +07:00
|
|
|
|
2018-07-05 01:55:18 +07:00
|
|
|
lockdep_assert_held(&vm->free_pages.lock);
|
2017-08-23 00:38:28 +07:00
|
|
|
GEM_BUG_ON(!pagevec_count(pvec));
|
2017-02-15 15:43:40 +07:00
|
|
|
|
2017-08-23 00:38:28 +07:00
|
|
|
if (vm->pt_kmap_wc) {
|
2018-07-05 01:55:18 +07:00
|
|
|
/*
|
|
|
|
* When we use WC, first fill up the global stash and then
|
2017-08-23 00:38:28 +07:00
|
|
|
* only if full immediately free the overflow.
|
|
|
|
*/
|
2018-07-05 01:55:18 +07:00
|
|
|
stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
|
2017-02-15 15:43:40 +07:00
|
|
|
|
2018-07-05 01:55:18 +07:00
|
|
|
/*
|
|
|
|
* As we have made some room in the VM's free_pages,
|
|
|
|
* we can wait for it to fill again. Unless we are
|
|
|
|
* inside i915_address_space_fini() and must
|
|
|
|
* immediately release the pages!
|
|
|
|
*/
|
|
|
|
if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
|
|
|
|
return;
|
2017-08-23 00:38:28 +07:00
|
|
|
|
2018-07-05 01:55:18 +07:00
|
|
|
/*
|
|
|
|
* We have to drop the lock to allow ourselves to sleep,
|
|
|
|
* so take a copy of the pvec and clear the stash for
|
|
|
|
* others to use it as we sleep.
|
|
|
|
*/
|
|
|
|
stack = *pvec;
|
|
|
|
pagevec_reinit(pvec);
|
|
|
|
spin_unlock(&vm->free_pages.lock);
|
|
|
|
|
|
|
|
pvec = &stack;
|
2017-08-23 00:38:28 +07:00
|
|
|
set_pages_array_wb(pvec->pages, pvec->nr);
|
2018-07-05 01:55:18 +07:00
|
|
|
|
|
|
|
spin_lock(&vm->free_pages.lock);
|
2017-08-23 00:38:28 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
__pagevec_release(pvec);
|
2017-02-15 15:43:40 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static void vm_free_page(struct i915_address_space *vm, struct page *page)
|
|
|
|
{
|
2017-11-10 04:34:49 +07:00
|
|
|
/*
|
|
|
|
* On !llc, we need to change the pages back to WB. We only do so
|
|
|
|
* in bulk, so we rarely need to change the page attributes here,
|
|
|
|
* but doing so requires a stop_machine() from deep inside arch/x86/mm.
|
|
|
|
* To make detection of the possible sleep more likely, use an
|
|
|
|
* unconditional might_sleep() for everybody.
|
|
|
|
*/
|
|
|
|
might_sleep();
|
2018-07-05 01:55:18 +07:00
|
|
|
spin_lock(&vm->free_pages.lock);
|
2019-05-29 16:34:07 +07:00
|
|
|
while (!pagevec_space(&vm->free_pages.pvec))
|
2017-08-23 00:38:28 +07:00
|
|
|
vm_free_pages_release(vm, false);
|
2019-05-29 16:34:07 +07:00
|
|
|
GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE);
|
|
|
|
pagevec_add(&vm->free_pages.pvec, page);
|
2018-07-05 01:55:18 +07:00
|
|
|
spin_unlock(&vm->free_pages.lock);
|
|
|
|
}
|
|
|
|
|
2019-06-21 01:37:05 +07:00
|
|
|
static void i915_address_space_fini(struct i915_address_space *vm)
|
|
|
|
{
|
|
|
|
spin_lock(&vm->free_pages.lock);
|
|
|
|
if (pagevec_count(&vm->free_pages.pvec))
|
|
|
|
vm_free_pages_release(vm, true);
|
|
|
|
GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
|
|
|
|
spin_unlock(&vm->free_pages.lock);
|
|
|
|
|
|
|
|
drm_mm_takedown(&vm->mm);
|
|
|
|
|
|
|
|
mutex_destroy(&vm->mutex);
|
|
|
|
}
|
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
void __i915_vm_close(struct i915_address_space *vm)
|
2019-06-21 01:37:05 +07:00
|
|
|
{
|
2019-10-04 20:39:56 +07:00
|
|
|
struct i915_vma *vma, *vn;
|
2019-06-21 01:37:05 +07:00
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
mutex_lock(&vm->mutex);
|
|
|
|
list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
|
|
|
|
struct drm_i915_gem_object *obj = vma->obj;
|
|
|
|
|
|
|
|
/* Keep the obj (and hence the vma) alive as _we_ destroy it */
|
|
|
|
if (!kref_get_unless_zero(&obj->base.refcount))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
|
|
|
|
WARN_ON(__i915_vma_unbind(vma));
|
2019-10-04 20:39:56 +07:00
|
|
|
i915_vma_destroy(vma);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
}
|
2019-10-04 20:39:56 +07:00
|
|
|
GEM_BUG_ON(!list_empty(&vm->bound_list));
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
mutex_unlock(&vm->mutex);
|
2019-06-21 01:37:05 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static void __i915_vm_release(struct work_struct *work)
|
|
|
|
{
|
|
|
|
struct i915_address_space *vm =
|
|
|
|
container_of(work, struct i915_address_space, rcu.work);
|
|
|
|
|
|
|
|
vm->cleanup(vm);
|
|
|
|
i915_address_space_fini(vm);
|
|
|
|
|
|
|
|
kfree(vm);
|
|
|
|
}
|
|
|
|
|
|
|
|
void i915_vm_release(struct kref *kref)
|
|
|
|
{
|
|
|
|
struct i915_address_space *vm =
|
|
|
|
container_of(kref, struct i915_address_space, ref);
|
|
|
|
|
|
|
|
GEM_BUG_ON(i915_is_ggtt(vm));
|
|
|
|
trace_i915_ppgtt_release(vm);
|
|
|
|
|
|
|
|
queue_rcu_work(vm->i915->wq, &vm->rcu);
|
|
|
|
}
|
|
|
|
|
2019-01-15 04:59:56 +07:00
|
|
|
static void i915_address_space_init(struct i915_address_space *vm, int subclass)
|
2018-07-05 01:55:18 +07:00
|
|
|
{
|
2019-06-11 16:12:37 +07:00
|
|
|
kref_init(&vm->ref);
|
2019-06-21 01:37:05 +07:00
|
|
|
INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
atomic_set(&vm->open, 1);
|
2019-06-11 16:12:37 +07:00
|
|
|
|
2018-07-11 14:36:02 +07:00
|
|
|
/*
|
|
|
|
* The vm->mutex must be reclaim safe (for use in the shrinker).
|
|
|
|
* Do a dummy acquire now under fs_reclaim so that any allocation
|
|
|
|
* attempt holding the lock is immediately reported by lockdep.
|
|
|
|
*/
|
|
|
|
mutex_init(&vm->mutex);
|
2019-01-15 04:59:56 +07:00
|
|
|
lockdep_set_subclass(&vm->mutex, subclass);
|
2019-01-07 18:54:24 +07:00
|
|
|
i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
|
2018-07-11 14:36:02 +07:00
|
|
|
|
2018-07-05 01:55:18 +07:00
|
|
|
GEM_BUG_ON(!vm->total);
|
|
|
|
drm_mm_init(&vm->mm, 0, vm->total);
|
|
|
|
vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
|
|
|
|
|
|
|
|
stash_init(&vm->free_pages);
|
|
|
|
|
2019-01-28 17:23:52 +07:00
|
|
|
INIT_LIST_HEAD(&vm->bound_list);
|
2018-07-05 01:55:18 +07:00
|
|
|
}
|
|
|
|
|
2017-02-15 15:43:40 +07:00
|
|
|
static int __setup_page_dma(struct i915_address_space *vm,
|
|
|
|
struct i915_page_dma *p,
|
|
|
|
gfp_t gfp)
|
|
|
|
{
|
2018-05-22 15:36:43 +07:00
|
|
|
p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
|
2017-02-15 15:43:40 +07:00
|
|
|
if (unlikely(!p->page))
|
|
|
|
return -ENOMEM;
|
2015-03-16 23:00:56 +07:00
|
|
|
|
2018-07-06 19:26:10 +07:00
|
|
|
p->daddr = dma_map_page_attrs(vm->dma,
|
|
|
|
p->page, 0, PAGE_SIZE,
|
|
|
|
PCI_DMA_BIDIRECTIONAL,
|
2018-07-06 19:26:11 +07:00
|
|
|
DMA_ATTR_SKIP_CPU_SYNC |
|
2018-07-06 19:26:10 +07:00
|
|
|
DMA_ATTR_NO_WARN);
|
2017-02-15 15:43:40 +07:00
|
|
|
if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
|
|
|
|
vm_free_page(vm, p->page);
|
|
|
|
return -ENOMEM;
|
2015-06-25 22:35:07 +07:00
|
|
|
}
|
2015-03-25 00:06:33 +07:00
|
|
|
|
|
|
|
return 0;
|
2015-03-16 23:00:56 +07:00
|
|
|
}
|
|
|
|
|
2017-02-15 15:43:40 +07:00
|
|
|
static int setup_page_dma(struct i915_address_space *vm,
|
2016-11-16 15:55:34 +07:00
|
|
|
struct i915_page_dma *p)
|
2015-06-25 22:35:13 +07:00
|
|
|
{
|
2018-05-22 15:36:43 +07:00
|
|
|
return __setup_page_dma(vm, p, __GFP_HIGHMEM);
|
2015-06-25 22:35:13 +07:00
|
|
|
}
|
|
|
|
|
2017-02-15 15:43:40 +07:00
|
|
|
static void cleanup_page_dma(struct i915_address_space *vm,
|
2016-11-16 15:55:34 +07:00
|
|
|
struct i915_page_dma *p)
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-24 23:22:36 +07:00
|
|
|
{
|
2017-02-15 15:43:40 +07:00
|
|
|
dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
|
|
|
|
vm_free_page(vm, p->page);
|
2015-06-25 22:35:07 +07:00
|
|
|
}
|
|
|
|
|
2017-02-15 15:43:41 +07:00
|
|
|
#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
|
2015-06-25 22:35:11 +07:00
|
|
|
|
2019-07-12 14:58:18 +07:00
|
|
|
static void
|
|
|
|
fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count)
|
2015-06-25 22:35:11 +07:00
|
|
|
{
|
2019-07-12 14:58:18 +07:00
|
|
|
kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
|
2015-06-25 22:35:11 +07:00
|
|
|
}
|
|
|
|
|
2019-07-12 14:58:18 +07:00
|
|
|
#define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64))
|
|
|
|
#define fill32_px(px, v) do { \
|
|
|
|
u64 v__ = lower_32_bits(v); \
|
|
|
|
fill_px((px), v__ << 32 | v__); \
|
|
|
|
} while (0)
|
2015-06-25 22:35:10 +07:00
|
|
|
|
2016-08-22 14:44:30 +07:00
|
|
|
static int
|
2017-02-15 15:43:40 +07:00
|
|
|
setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
|
2015-06-30 22:16:39 +07:00
|
|
|
{
|
2018-01-29 17:28:40 +07:00
|
|
|
unsigned long size;
|
2017-08-23 00:38:28 +07:00
|
|
|
|
2017-10-07 05:18:25 +07:00
|
|
|
/*
|
|
|
|
* In order to utilize 64K pages for an object with a size < 2M, we will
|
|
|
|
* need to support a 64K scratch page, given that every 16th entry for a
|
|
|
|
* page-table operating in 64K mode must point to a properly aligned 64K
|
|
|
|
* region, including any PTEs which happen to point to scratch.
|
|
|
|
*
|
|
|
|
* This is only relevant for the 48b PPGTT where we support
|
2018-10-30 01:27:21 +07:00
|
|
|
* huge-gtt-pages, see also i915_vma_insert(). However, as we share the
|
|
|
|
* scratch (read-only) between all vm, we create one 64k scratch page
|
|
|
|
* for all.
|
2017-10-07 05:18:25 +07:00
|
|
|
*/
|
2018-01-29 17:28:40 +07:00
|
|
|
size = I915_GTT_PAGE_SIZE_4K;
|
2019-03-15 05:38:38 +07:00
|
|
|
if (i915_vm_is_4lvl(vm) &&
|
2017-10-07 05:18:25 +07:00
|
|
|
HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
|
2018-01-29 17:28:40 +07:00
|
|
|
size = I915_GTT_PAGE_SIZE_64K;
|
|
|
|
gfp |= __GFP_NOWARN;
|
2017-10-07 05:18:25 +07:00
|
|
|
}
|
2018-01-29 17:28:40 +07:00
|
|
|
gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
|
|
|
|
|
|
|
|
do {
|
2019-07-12 16:43:26 +07:00
|
|
|
unsigned int order = get_order(size);
|
2018-01-29 17:28:40 +07:00
|
|
|
struct page *page;
|
|
|
|
dma_addr_t addr;
|
2017-08-23 00:38:28 +07:00
|
|
|
|
2018-01-29 17:28:40 +07:00
|
|
|
page = alloc_pages(gfp, order);
|
2017-10-07 05:18:25 +07:00
|
|
|
if (unlikely(!page))
|
2018-01-29 17:28:40 +07:00
|
|
|
goto skip;
|
2017-10-07 05:18:25 +07:00
|
|
|
|
2018-07-06 19:26:10 +07:00
|
|
|
addr = dma_map_page_attrs(vm->dma,
|
|
|
|
page, 0, size,
|
|
|
|
PCI_DMA_BIDIRECTIONAL,
|
2018-07-06 19:26:11 +07:00
|
|
|
DMA_ATTR_SKIP_CPU_SYNC |
|
2018-07-06 19:26:10 +07:00
|
|
|
DMA_ATTR_NO_WARN);
|
2018-01-29 17:28:40 +07:00
|
|
|
if (unlikely(dma_mapping_error(vm->dma, addr)))
|
|
|
|
goto free_page;
|
2017-08-23 00:38:28 +07:00
|
|
|
|
2018-01-29 17:28:40 +07:00
|
|
|
if (unlikely(!IS_ALIGNED(addr, size)))
|
|
|
|
goto unmap_page;
|
2017-10-07 05:18:25 +07:00
|
|
|
|
2019-07-12 16:43:26 +07:00
|
|
|
vm->scratch[0].base.page = page;
|
|
|
|
vm->scratch[0].base.daddr = addr;
|
2019-03-05 20:54:27 +07:00
|
|
|
vm->scratch_order = order;
|
2018-01-29 17:28:40 +07:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
unmap_page:
|
|
|
|
dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
|
|
|
|
free_page:
|
|
|
|
__free_pages(page, order);
|
|
|
|
skip:
|
|
|
|
if (size == I915_GTT_PAGE_SIZE_4K)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
size = I915_GTT_PAGE_SIZE_4K;
|
|
|
|
gfp &= ~__GFP_NOWARN;
|
|
|
|
} while (1);
|
2015-06-30 22:16:39 +07:00
|
|
|
}
|
|
|
|
|
2017-02-15 15:43:40 +07:00
|
|
|
static void cleanup_scratch_page(struct i915_address_space *vm)
|
2015-06-30 22:16:39 +07:00
|
|
|
{
|
2019-07-12 16:43:26 +07:00
|
|
|
struct i915_page_dma *p = px_base(&vm->scratch[0]);
|
|
|
|
unsigned int order = vm->scratch_order;
|
2017-08-23 00:38:28 +07:00
|
|
|
|
2019-03-05 20:54:27 +07:00
|
|
|
dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
|
2017-10-07 05:18:25 +07:00
|
|
|
PCI_DMA_BIDIRECTIONAL);
|
2019-03-05 20:54:27 +07:00
|
|
|
__free_pages(p->page, order);
|
2015-06-30 22:16:39 +07:00
|
|
|
}
|
|
|
|
|
2019-07-12 14:58:18 +07:00
|
|
|
static void free_scratch(struct i915_address_space *vm)
|
|
|
|
{
|
2019-07-12 16:43:26 +07:00
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */
|
2019-07-12 14:58:18 +07:00
|
|
|
return;
|
|
|
|
|
2019-07-12 16:43:26 +07:00
|
|
|
for (i = 1; i <= vm->top; i++) {
|
|
|
|
if (!px_dma(&vm->scratch[i]))
|
|
|
|
break;
|
|
|
|
cleanup_page_dma(vm, px_base(&vm->scratch[i]));
|
|
|
|
}
|
2019-07-12 14:58:18 +07:00
|
|
|
|
|
|
|
cleanup_scratch_page(vm);
|
|
|
|
}
|
|
|
|
|
2017-02-15 15:43:40 +07:00
|
|
|
static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-24 23:22:36 +07:00
|
|
|
{
|
2015-04-08 18:13:23 +07:00
|
|
|
struct i915_page_table *pt;
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-24 23:22:36 +07:00
|
|
|
|
2018-05-22 15:36:43 +07:00
|
|
|
pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL);
|
2017-02-15 15:43:46 +07:00
|
|
|
if (unlikely(!pt))
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-24 23:22:36 +07:00
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
2019-07-06 04:52:02 +07:00
|
|
|
if (unlikely(setup_page_dma(vm, &pt->base))) {
|
2017-02-15 15:43:46 +07:00
|
|
|
kfree(pt);
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-24 23:22:36 +07:00
|
|
|
|
2019-06-14 23:43:42 +07:00
|
|
|
atomic_set(&pt->used, 0);
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-24 23:22:36 +07:00
|
|
|
return pt;
|
|
|
|
}
|
|
|
|
|
2019-07-12 18:27:22 +07:00
|
|
|
static struct i915_page_directory *__alloc_pd(size_t sz)
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-24 23:22:36 +07:00
|
|
|
{
|
2015-04-08 18:13:23 +07:00
|
|
|
struct i915_page_directory *pd;
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-24 23:22:36 +07:00
|
|
|
|
2019-07-12 18:27:22 +07:00
|
|
|
pd = kzalloc(sz, I915_GFP_ALLOW_FAIL);
|
2019-06-14 23:43:42 +07:00
|
|
|
if (unlikely(!pd))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
spin_lock_init(&pd->lock);
|
|
|
|
return pd;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
|
|
|
|
{
|
|
|
|
struct i915_page_directory *pd;
|
|
|
|
|
2019-07-12 18:27:22 +07:00
|
|
|
pd = __alloc_pd(sizeof(*pd));
|
2017-02-15 15:43:47 +07:00
|
|
|
if (unlikely(!pd))
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-24 23:22:36 +07:00
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
2019-07-12 16:43:22 +07:00
|
|
|
if (unlikely(setup_page_dma(vm, px_base(pd)))) {
|
2017-02-15 15:43:47 +07:00
|
|
|
kfree(pd);
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
2015-04-08 18:13:32 +07:00
|
|
|
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-24 23:22:36 +07:00
|
|
|
return pd;
|
|
|
|
}
|
|
|
|
|
2019-07-12 16:43:22 +07:00
|
|
|
static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
|
2015-06-30 22:16:37 +07:00
|
|
|
{
|
2019-07-12 16:43:22 +07:00
|
|
|
cleanup_page_dma(vm, pd);
|
2017-02-15 15:43:47 +07:00
|
|
|
kfree(pd);
|
2015-06-30 22:16:37 +07:00
|
|
|
}
|
|
|
|
|
2019-07-12 16:43:22 +07:00
|
|
|
#define free_px(vm, px) free_pd(vm, px_base(px))
|
|
|
|
|
2019-07-06 04:52:01 +07:00
|
|
|
static inline void
|
|
|
|
write_dma_entry(struct i915_page_dma * const pdma,
|
2019-07-12 16:43:27 +07:00
|
|
|
const unsigned short idx,
|
2019-07-06 04:52:01 +07:00
|
|
|
const u64 encoded_entry)
|
|
|
|
{
|
|
|
|
u64 * const vaddr = kmap_atomic(pdma->page);
|
|
|
|
|
2019-07-12 16:43:27 +07:00
|
|
|
vaddr[idx] = encoded_entry;
|
2019-07-06 04:52:01 +07:00
|
|
|
kunmap_atomic(vaddr);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
__set_pd_entry(struct i915_page_directory * const pd,
|
2019-07-12 16:43:27 +07:00
|
|
|
const unsigned short idx,
|
2019-07-06 04:52:01 +07:00
|
|
|
struct i915_page_dma * const to,
|
|
|
|
u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
|
2015-06-30 22:16:37 +07:00
|
|
|
{
|
2019-08-20 21:12:18 +07:00
|
|
|
/* Each thread pre-pins the pd, and we may have a thread per pde. */
|
|
|
|
GEM_BUG_ON(atomic_read(px_used(pd)) > 2 * ARRAY_SIZE(pd->entry));
|
2019-07-06 04:52:01 +07:00
|
|
|
|
2019-07-12 16:43:22 +07:00
|
|
|
atomic_inc(px_used(pd));
|
2019-07-12 16:43:27 +07:00
|
|
|
pd->entry[idx] = to;
|
|
|
|
write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC));
|
2015-07-29 23:23:46 +07:00
|
|
|
}
|
|
|
|
|
2019-07-12 16:43:27 +07:00
|
|
|
#define set_pd_entry(pd, idx, to) \
|
|
|
|
__set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode)
|
2019-07-12 16:43:26 +07:00
|
|
|
|
2019-07-06 04:52:01 +07:00
|
|
|
static inline void
|
2019-07-12 16:43:26 +07:00
|
|
|
clear_pd_entry(struct i915_page_directory * const pd,
|
2019-07-12 16:43:27 +07:00
|
|
|
const unsigned short idx,
|
|
|
|
const struct i915_page_scratch * const scratch)
|
2015-07-29 23:23:55 +07:00
|
|
|
{
|
2019-07-12 16:43:22 +07:00
|
|
|
GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
|
2019-06-14 23:43:45 +07:00
|
|
|
|
2019-07-12 16:43:27 +07:00
|
|
|
write_dma_entry(px_base(pd), idx, scratch->encode);
|
|
|
|
pd->entry[idx] = NULL;
|
2019-07-12 16:43:22 +07:00
|
|
|
atomic_dec(px_used(pd));
|
2015-07-29 23:23:46 +07:00
|
|
|
}
|
|
|
|
|
2019-07-06 04:52:04 +07:00
|
|
|
static bool
|
|
|
|
release_pd_entry(struct i915_page_directory * const pd,
|
2019-07-12 16:43:27 +07:00
|
|
|
const unsigned short idx,
|
2019-07-12 16:43:22 +07:00
|
|
|
struct i915_page_table * const pt,
|
2019-07-12 16:43:27 +07:00
|
|
|
const struct i915_page_scratch * const scratch)
|
2019-07-06 04:52:04 +07:00
|
|
|
{
|
|
|
|
bool free = false;
|
|
|
|
|
2019-07-12 18:27:23 +07:00
|
|
|
if (atomic_add_unless(&pt->used, -1, 1))
|
|
|
|
return false;
|
|
|
|
|
2019-07-06 04:52:04 +07:00
|
|
|
spin_lock(&pd->lock);
|
2019-07-12 16:43:22 +07:00
|
|
|
if (atomic_dec_and_test(&pt->used)) {
|
2019-07-12 16:43:27 +07:00
|
|
|
clear_pd_entry(pd, idx, scratch);
|
2019-07-06 04:52:04 +07:00
|
|
|
free = true;
|
|
|
|
}
|
|
|
|
spin_unlock(&pd->lock);
|
|
|
|
|
|
|
|
return free;
|
|
|
|
}
|
2019-07-06 04:52:01 +07:00
|
|
|
|
2019-08-23 13:57:31 +07:00
|
|
|
static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
|
2019-07-12 16:43:23 +07:00
|
|
|
{
|
2019-08-23 13:57:31 +07:00
|
|
|
struct drm_i915_private *dev_priv = ppgtt->vm.i915;
|
2019-07-12 16:43:23 +07:00
|
|
|
enum vgt_g2v_type msg;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (create)
|
|
|
|
atomic_inc(px_used(ppgtt->pd)); /* never remove */
|
|
|
|
else
|
|
|
|
atomic_dec(px_used(ppgtt->pd));
|
|
|
|
|
2019-08-23 13:57:31 +07:00
|
|
|
mutex_lock(&dev_priv->vgpu.lock);
|
|
|
|
|
|
|
|
if (i915_vm_is_4lvl(&ppgtt->vm)) {
|
2019-07-12 16:43:23 +07:00
|
|
|
const u64 daddr = px_dma(ppgtt->pd);
|
|
|
|
|
|
|
|
I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
|
|
|
|
I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
|
|
|
|
|
|
|
|
msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
|
|
|
|
VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
|
|
|
|
} else {
|
|
|
|
for (i = 0; i < GEN8_3LVL_PDPES; i++) {
|
|
|
|
const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
|
|
|
|
|
|
|
|
I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
|
|
|
|
I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
|
|
|
|
}
|
|
|
|
|
|
|
|
msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
|
|
|
|
VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
|
|
|
|
}
|
|
|
|
|
2019-08-23 13:57:31 +07:00
|
|
|
/* g2v_notify atomically (via hv trap) consumes the message packet. */
|
2019-07-12 16:43:23 +07:00
|
|
|
I915_WRITE(vgtif_reg(g2v_notify), msg);
|
|
|
|
|
2019-08-23 13:57:31 +07:00
|
|
|
mutex_unlock(&dev_priv->vgpu.lock);
|
2019-07-12 16:43:23 +07:00
|
|
|
}
|
|
|
|
|
2019-07-12 16:43:25 +07:00
|
|
|
/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
|
|
|
|
#define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */
|
|
|
|
#define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE))
|
|
|
|
#define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64))
|
|
|
|
#define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES))
|
|
|
|
#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
|
|
|
|
#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
|
|
|
|
#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
|
|
|
|
|
|
|
|
static inline unsigned int
|
|
|
|
gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx)
|
|
|
|
{
|
|
|
|
const int shift = gen8_pd_shift(lvl);
|
|
|
|
const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
|
|
|
|
|
|
|
|
GEM_BUG_ON(start >= end);
|
|
|
|
end += ~mask >> gen8_pd_shift(1);
|
|
|
|
|
|
|
|
*idx = i915_pde_index(start, shift);
|
|
|
|
if ((start ^ end) & mask)
|
|
|
|
return GEN8_PDES - *idx;
|
|
|
|
else
|
|
|
|
return i915_pde_index(end, shift) - *idx;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool gen8_pd_contains(u64 start, u64 end, int lvl)
|
|
|
|
{
|
|
|
|
const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
|
|
|
|
|
|
|
|
GEM_BUG_ON(start >= end);
|
|
|
|
return (start ^ end) & mask && (start & ~mask) == 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned int gen8_pt_count(u64 start, u64 end)
|
|
|
|
{
|
|
|
|
GEM_BUG_ON(start >= end);
|
|
|
|
if ((start ^ end) >> gen8_pd_shift(1))
|
|
|
|
return GEN8_PDES - (start & (GEN8_PDES - 1));
|
|
|
|
else
|
|
|
|
return end - start;
|
|
|
|
}
|
|
|
|
|
2019-07-19 20:07:37 +07:00
|
|
|
static inline unsigned int gen8_pd_top_count(const struct i915_address_space *vm)
|
|
|
|
{
|
|
|
|
unsigned int shift = __gen8_pte_shift(vm->top);
|
|
|
|
return (vm->total + (1ull << shift) - 1) >> shift;
|
|
|
|
}
|
|
|
|
|
2019-08-16 16:47:54 +07:00
|
|
|
static inline struct i915_page_directory *
|
|
|
|
gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx)
|
|
|
|
{
|
|
|
|
struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
|
|
|
|
|
|
|
|
if (vm->top == 2)
|
|
|
|
return ppgtt->pd;
|
|
|
|
else
|
|
|
|
return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct i915_page_directory *
|
|
|
|
gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr)
|
|
|
|
{
|
|
|
|
return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT);
|
|
|
|
}
|
|
|
|
|
2019-07-12 18:27:22 +07:00
|
|
|
static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
|
|
|
|
struct i915_page_directory *pd,
|
|
|
|
int count, int lvl)
|
2019-07-12 16:43:23 +07:00
|
|
|
{
|
2019-07-12 18:27:22 +07:00
|
|
|
if (lvl) {
|
|
|
|
void **pde = pd->entry;
|
2019-07-12 16:43:23 +07:00
|
|
|
|
2019-07-12 18:27:22 +07:00
|
|
|
do {
|
|
|
|
if (!*pde)
|
|
|
|
continue;
|
2019-07-12 16:43:23 +07:00
|
|
|
|
2019-07-12 18:27:22 +07:00
|
|
|
__gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1);
|
|
|
|
} while (pde++, --count);
|
2019-07-12 16:43:23 +07:00
|
|
|
}
|
|
|
|
|
2019-07-12 18:27:22 +07:00
|
|
|
free_px(vm, pd);
|
2019-07-12 16:43:23 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
|
|
|
|
{
|
|
|
|
struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
|
|
|
|
|
2019-07-12 18:27:22 +07:00
|
|
|
if (intel_vgpu_active(vm->i915))
|
2019-07-12 16:43:23 +07:00
|
|
|
gen8_ppgtt_notify_vgt(ppgtt, false);
|
|
|
|
|
2019-07-19 20:07:37 +07:00
|
|
|
__gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top);
|
2019-07-12 16:43:23 +07:00
|
|
|
free_scratch(vm);
|
|
|
|
}
|
|
|
|
|
2019-07-12 18:27:23 +07:00
|
|
|
static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
|
|
|
|
struct i915_page_directory * const pd,
|
|
|
|
u64 start, const u64 end, int lvl)
|
2013-11-03 11:07:23 +07:00
|
|
|
{
|
2019-07-12 18:27:23 +07:00
|
|
|
const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
|
|
|
|
unsigned int idx, len;
|
2013-11-03 11:07:23 +07:00
|
|
|
|
2019-08-21 22:57:25 +07:00
|
|
|
GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
|
|
|
|
|
2019-07-12 18:27:23 +07:00
|
|
|
len = gen8_pd_range(start, end, lvl--, &idx);
|
2019-08-21 22:57:25 +07:00
|
|
|
DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
|
2019-07-12 18:27:23 +07:00
|
|
|
__func__, vm, lvl + 1, start, end,
|
|
|
|
idx, len, atomic_read(px_used(pd)));
|
|
|
|
GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
|
2016-10-13 19:02:42 +07:00
|
|
|
|
2019-07-12 18:27:23 +07:00
|
|
|
do {
|
|
|
|
struct i915_page_table *pt = pd->entry[idx];
|
|
|
|
|
|
|
|
if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
|
|
|
|
gen8_pd_contains(start, end, lvl)) {
|
|
|
|
DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
|
|
|
|
__func__, vm, lvl + 1, idx, start, end);
|
|
|
|
clear_pd_entry(pd, idx, scratch);
|
|
|
|
__gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
|
|
|
|
start += (u64)I915_PDES << gen8_pd_shift(lvl);
|
|
|
|
continue;
|
|
|
|
}
|
2019-07-06 04:52:04 +07:00
|
|
|
|
2019-07-12 18:27:23 +07:00
|
|
|
if (lvl) {
|
|
|
|
start = __gen8_ppgtt_clear(vm, as_pd(pt),
|
|
|
|
start, end, lvl);
|
|
|
|
} else {
|
|
|
|
unsigned int count;
|
|
|
|
u64 *vaddr;
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-24 23:22:36 +07:00
|
|
|
|
2019-07-12 18:27:23 +07:00
|
|
|
count = gen8_pt_count(start, end);
|
2019-08-21 22:57:25 +07:00
|
|
|
DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
|
2019-07-12 18:27:23 +07:00
|
|
|
__func__, vm, lvl, start, end,
|
|
|
|
gen8_pd_index(start, 0), count,
|
|
|
|
atomic_read(&pt->used));
|
|
|
|
GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
|
2016-10-13 19:02:41 +07:00
|
|
|
|
2019-07-12 18:27:23 +07:00
|
|
|
vaddr = kmap_atomic_px(pt);
|
|
|
|
memset64(vaddr + gen8_pd_index(start, 0),
|
|
|
|
vm->scratch[0].encode,
|
|
|
|
count);
|
|
|
|
kunmap_atomic(vaddr);
|
|
|
|
|
|
|
|
atomic_sub(count, &pt->used);
|
|
|
|
start += count;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (release_pd_entry(pd, idx, pt, scratch))
|
2019-07-12 16:43:22 +07:00
|
|
|
free_px(vm, pt);
|
2019-07-12 18:27:23 +07:00
|
|
|
} while (idx++, --len);
|
|
|
|
|
|
|
|
return start;
|
2017-02-15 15:43:47 +07:00
|
|
|
}
|
2016-10-13 19:02:42 +07:00
|
|
|
|
2019-07-12 18:27:23 +07:00
|
|
|
static void gen8_ppgtt_clear(struct i915_address_space *vm,
|
|
|
|
u64 start, u64 length)
|
2016-10-13 19:02:41 +07:00
|
|
|
{
|
2019-07-12 18:27:23 +07:00
|
|
|
GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
|
|
|
|
GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
|
2019-08-21 22:57:25 +07:00
|
|
|
GEM_BUG_ON(range_overflows(start, length, vm->total));
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-24 23:22:36 +07:00
|
|
|
|
2019-07-12 18:27:23 +07:00
|
|
|
start >>= GEN8_PTE_SHIFT;
|
|
|
|
length >>= GEN8_PTE_SHIFT;
|
|
|
|
GEM_BUG_ON(length == 0);
|
|
|
|
|
|
|
|
__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
|
|
|
|
start, start + length, vm->top);
|
2016-10-13 19:02:41 +07:00
|
|
|
}
|
2013-11-03 11:07:23 +07:00
|
|
|
|
2019-07-12 18:27:24 +07:00
|
|
|
static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
|
|
|
|
struct i915_page_directory * const pd,
|
2019-08-21 22:57:25 +07:00
|
|
|
u64 * const start, const u64 end, int lvl)
|
2017-02-26 01:11:22 +07:00
|
|
|
{
|
2019-07-12 18:27:24 +07:00
|
|
|
const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
|
|
|
|
struct i915_page_table *alloc = NULL;
|
|
|
|
unsigned int idx, len;
|
2019-07-12 16:43:23 +07:00
|
|
|
int ret = 0;
|
2017-02-26 01:11:22 +07:00
|
|
|
|
2019-08-21 22:57:25 +07:00
|
|
|
GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
|
|
|
|
|
2019-07-12 18:27:24 +07:00
|
|
|
len = gen8_pd_range(*start, end, lvl--, &idx);
|
2019-08-21 22:57:25 +07:00
|
|
|
DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
|
2019-07-12 18:27:24 +07:00
|
|
|
__func__, vm, lvl + 1, *start, end,
|
|
|
|
idx, len, atomic_read(px_used(pd)));
|
|
|
|
GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
|
|
|
|
|
2019-07-12 16:43:23 +07:00
|
|
|
spin_lock(&pd->lock);
|
2019-07-12 18:27:24 +07:00
|
|
|
GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */
|
|
|
|
do {
|
|
|
|
struct i915_page_table *pt = pd->entry[idx];
|
2013-11-03 11:07:24 +07:00
|
|
|
|
2019-07-12 16:43:27 +07:00
|
|
|
if (!pt) {
|
2019-07-12 16:43:23 +07:00
|
|
|
spin_unlock(&pd->lock);
|
2017-02-26 01:11:22 +07:00
|
|
|
|
2019-07-12 18:27:24 +07:00
|
|
|
DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
|
|
|
|
__func__, vm, lvl + 1, idx);
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 02:51:21 +07:00
|
|
|
|
2019-07-12 18:27:24 +07:00
|
|
|
pt = fetch_and_zero(&alloc);
|
|
|
|
if (lvl) {
|
|
|
|
if (!pt) {
|
|
|
|
pt = &alloc_pd(vm)->pt;
|
|
|
|
if (IS_ERR(pt)) {
|
|
|
|
ret = PTR_ERR(pt);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
2019-07-12 16:43:23 +07:00
|
|
|
|
2019-07-12 18:27:24 +07:00
|
|
|
fill_px(pt, vm->scratch[lvl].encode);
|
2019-07-12 16:43:23 +07:00
|
|
|
} else {
|
2019-07-12 18:27:24 +07:00
|
|
|
if (!pt) {
|
|
|
|
pt = alloc_pt(vm);
|
|
|
|
if (IS_ERR(pt)) {
|
|
|
|
ret = PTR_ERR(pt);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
2013-11-03 11:07:24 +07:00
|
|
|
|
2019-07-12 18:27:24 +07:00
|
|
|
if (intel_vgpu_active(vm->i915) ||
|
|
|
|
gen8_pt_count(*start, end) < I915_PDES)
|
|
|
|
fill_px(pt, vm->scratch[lvl].encode);
|
|
|
|
}
|
2017-02-26 01:11:22 +07:00
|
|
|
|
2019-07-12 18:27:24 +07:00
|
|
|
spin_lock(&pd->lock);
|
|
|
|
if (likely(!pd->entry[idx]))
|
|
|
|
set_pd_entry(pd, idx, pt);
|
|
|
|
else
|
|
|
|
alloc = pt, pt = pd->entry[idx];
|
|
|
|
}
|
2017-02-26 01:11:22 +07:00
|
|
|
|
2019-07-12 18:27:24 +07:00
|
|
|
if (lvl) {
|
|
|
|
atomic_inc(&pt->used);
|
|
|
|
spin_unlock(&pd->lock);
|
2017-02-15 15:43:37 +07:00
|
|
|
|
2019-07-12 18:27:24 +07:00
|
|
|
ret = __gen8_ppgtt_alloc(vm, as_pd(pt),
|
|
|
|
start, end, lvl);
|
|
|
|
if (unlikely(ret)) {
|
|
|
|
if (release_pd_entry(pd, idx, pt, scratch))
|
|
|
|
free_px(vm, pt);
|
|
|
|
goto out;
|
drm/i915/bdw: Reorganize PT allocations
The previous allocation mechanism would get 2 contiguous allocations,
one for the page directories, and one for the page tables. As each page
table is 1 page, and there are 512 of these per page directory, this
goes to 2MB. An unfriendly request at best. Worse still, our HW now
supports 4 page directories, and a 2MB allocation is not allowed.
In order to fix this, this patch attempts to split up each page table
allocation into a single, discrete allocation. There is nothing really
fancy about the patch itself, it just has to manage an extra pointer
indirection, and have a fancier bit of logic to free up the pages.
To accommodate some of the added complexity, two new helpers are
introduced to allocate, and free the page table pages.
NOTE: I really wanted to split the way we do allocations, and the way in
which we identify the page table/page directory being used. I found
splitting this functionality up to be too unwieldy. I apologize in
advance to the reviewer. I'd recommend looking at the result, rather
than the diff.
v2/NOTE2: This patch predated commit:
6f1cc993518462ccf039e195fabd47e7aa5bfd13
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue Dec 31 15:50:31 2013 +0000
drm/i915: Avoid dereference past end of page arr
It fixed the same issue as that patch, but because of the limbo state of
PPGTT, Chris patch was merged instead. The excess churn is a result of
my using my original patch, which has my preferred naming. Primarily
act_* is changed to which_*, but it's mostly the same otherwise. I've
kept the convention Chris used for the pte wrap (I had something
slightly different, and broken - but fixable)
v3: Rename which_p[..]e to drop which_ (Chris)
Remove BUG_ON in inner loop (Chris)
Redo the pde/pdpe wrap logic (Chris)
v4: s/1MB/2MB in commit message (Imre)
Plug leaking gen8_pt_pages in both the error path, as well as general
free case (Imre)
v5: Rename leftover "which_" variables (Imre)
Add the pde = 0 wrap that was missed from v3 (Imre)
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
[danvet: Squash in fixup from Ben.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-02-21 02:51:21 +07:00
|
|
|
}
|
2017-02-15 15:43:37 +07:00
|
|
|
|
2019-07-12 18:27:24 +07:00
|
|
|
spin_lock(&pd->lock);
|
|
|
|
atomic_dec(&pt->used);
|
|
|
|
GEM_BUG_ON(!atomic_read(&pt->used));
|
|
|
|
} else {
|
|
|
|
unsigned int count = gen8_pt_count(*start, end);
|
2019-07-12 16:43:23 +07:00
|
|
|
|
2019-08-21 22:57:25 +07:00
|
|
|
DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
|
2019-07-12 18:27:24 +07:00
|
|
|
__func__, vm, lvl, *start, end,
|
|
|
|
gen8_pd_index(*start, 0), count,
|
|
|
|
atomic_read(&pt->used));
|
2019-07-12 16:43:23 +07:00
|
|
|
|
2019-07-12 18:27:24 +07:00
|
|
|
atomic_add(count, &pt->used);
|
2019-08-21 11:20:44 +07:00
|
|
|
/* All other pdes may be simultaneously removed */
|
|
|
|
GEM_BUG_ON(atomic_read(&pt->used) > 2 * I915_PDES);
|
2019-07-12 18:27:24 +07:00
|
|
|
*start += count;
|
|
|
|
}
|
|
|
|
} while (idx++, --len);
|
|
|
|
spin_unlock(&pd->lock);
|
2019-07-12 16:43:23 +07:00
|
|
|
out:
|
|
|
|
if (alloc)
|
|
|
|
free_px(vm, alloc);
|
2017-02-15 15:43:37 +07:00
|
|
|
return ret;
|
2013-11-03 11:07:24 +07:00
|
|
|
}
|
|
|
|
|
2019-07-12 18:27:24 +07:00
|
|
|
static int gen8_ppgtt_alloc(struct i915_address_space *vm,
|
|
|
|
u64 start, u64 length)
|
2019-07-12 16:43:23 +07:00
|
|
|
{
|
2019-07-19 20:15:24 +07:00
|
|
|
u64 from;
|
2019-07-12 18:27:24 +07:00
|
|
|
int err;
|
2019-07-12 16:43:23 +07:00
|
|
|
|
2019-07-12 18:27:24 +07:00
|
|
|
GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
|
|
|
|
GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
|
2019-08-21 22:57:25 +07:00
|
|
|
GEM_BUG_ON(range_overflows(start, length, vm->total));
|
2019-07-12 16:43:23 +07:00
|
|
|
|
2019-07-12 18:27:24 +07:00
|
|
|
start >>= GEN8_PTE_SHIFT;
|
|
|
|
length >>= GEN8_PTE_SHIFT;
|
|
|
|
GEM_BUG_ON(length == 0);
|
2019-07-19 20:15:24 +07:00
|
|
|
from = start;
|
2019-07-12 16:43:23 +07:00
|
|
|
|
2019-07-12 18:27:24 +07:00
|
|
|
err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd,
|
|
|
|
&start, start + length, vm->top);
|
2019-07-19 22:33:22 +07:00
|
|
|
if (unlikely(err && from != start))
|
2019-07-12 18:27:24 +07:00
|
|
|
__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
|
|
|
|
from, start, vm->top);
|
2019-07-12 16:43:23 +07:00
|
|
|
|
2019-07-12 18:27:24 +07:00
|
|
|
return err;
|
2019-07-12 16:43:23 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct sgt_dma {
|
|
|
|
struct scatterlist *sg;
|
|
|
|
dma_addr_t dma, max;
|
|
|
|
} sgt_dma(struct i915_vma *vma) {
|
|
|
|
struct scatterlist *sg = vma->pages->sgl;
|
|
|
|
dma_addr_t addr = sg_dma_address(sg);
|
|
|
|
return (struct sgt_dma) { sg, addr, addr + sg->length };
|
|
|
|
}
|
|
|
|
|
2019-07-12 18:27:25 +07:00
|
|
|
static __always_inline u64
|
2019-08-16 16:47:54 +07:00
|
|
|
gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
|
|
|
|
struct i915_page_directory *pdp,
|
|
|
|
struct sgt_dma *iter,
|
|
|
|
u64 idx,
|
|
|
|
enum i915_cache_level cache_level,
|
|
|
|
u32 flags)
|
2019-07-12 16:43:23 +07:00
|
|
|
{
|
|
|
|
struct i915_page_directory *pd;
|
|
|
|
const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
|
|
|
|
gen8_pte_t *vaddr;
|
|
|
|
|
2019-07-12 18:27:25 +07:00
|
|
|
pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
|
|
|
|
vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
|
2019-07-12 16:43:23 +07:00
|
|
|
do {
|
2019-07-12 18:27:25 +07:00
|
|
|
vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
|
2019-07-12 16:43:23 +07:00
|
|
|
|
|
|
|
iter->dma += I915_GTT_PAGE_SIZE;
|
|
|
|
if (iter->dma >= iter->max) {
|
|
|
|
iter->sg = __sg_next(iter->sg);
|
|
|
|
if (!iter->sg) {
|
2019-07-12 18:27:25 +07:00
|
|
|
idx = 0;
|
2019-07-12 16:43:23 +07:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
iter->dma = sg_dma_address(iter->sg);
|
|
|
|
iter->max = iter->dma + iter->sg->length;
|
|
|
|
}
|
|
|
|
|
2019-07-12 18:27:25 +07:00
|
|
|
if (gen8_pd_index(++idx, 0) == 0) {
|
|
|
|
if (gen8_pd_index(idx, 1) == 0) {
|
2019-07-12 16:43:23 +07:00
|
|
|
/* Limited by sg length for 3lvl */
|
2019-07-12 18:27:25 +07:00
|
|
|
if (gen8_pd_index(idx, 2) == 0)
|
2019-07-12 16:43:23 +07:00
|
|
|
break;
|
|
|
|
|
2019-07-12 18:27:25 +07:00
|
|
|
pd = pdp->entry[gen8_pd_index(idx, 2)];
|
2019-07-12 16:43:23 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
kunmap_atomic(vaddr);
|
2019-07-12 18:27:25 +07:00
|
|
|
vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
|
2019-07-12 16:43:23 +07:00
|
|
|
}
|
|
|
|
} while (1);
|
|
|
|
kunmap_atomic(vaddr);
|
|
|
|
|
2019-07-12 18:27:25 +07:00
|
|
|
return idx;
|
2019-07-12 16:43:23 +07:00
|
|
|
}
|
|
|
|
|
2019-08-16 16:47:54 +07:00
|
|
|
static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
|
|
|
|
struct sgt_dma *iter,
|
2019-07-12 16:43:23 +07:00
|
|
|
enum i915_cache_level cache_level,
|
|
|
|
u32 flags)
|
2017-10-07 05:18:24 +07:00
|
|
|
{
|
2018-07-13 01:53:11 +07:00
|
|
|
const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
|
2017-10-07 05:18:24 +07:00
|
|
|
u64 start = vma->node.start;
|
|
|
|
dma_addr_t rem = iter->sg->length;
|
|
|
|
|
2019-08-16 16:47:54 +07:00
|
|
|
GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm));
|
|
|
|
|
2017-10-07 05:18:24 +07:00
|
|
|
do {
|
2019-08-16 16:47:54 +07:00
|
|
|
struct i915_page_directory * const pdp =
|
|
|
|
gen8_pdp_for_page_address(vma->vm, start);
|
|
|
|
struct i915_page_directory * const pd =
|
2019-07-12 18:27:25 +07:00
|
|
|
i915_pd_entry(pdp, __gen8_pte_index(start, 2));
|
2017-10-07 05:18:24 +07:00
|
|
|
gen8_pte_t encode = pte_encode;
|
2019-07-12 18:27:25 +07:00
|
|
|
unsigned int maybe_64K = -1;
|
|
|
|
unsigned int page_size;
|
2017-10-07 05:18:24 +07:00
|
|
|
gen8_pte_t *vaddr;
|
2019-07-12 18:27:25 +07:00
|
|
|
u16 index;
|
2017-10-07 05:18:24 +07:00
|
|
|
|
|
|
|
if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
|
|
|
|
IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
|
2019-07-12 18:27:25 +07:00
|
|
|
rem >= I915_GTT_PAGE_SIZE_2M &&
|
|
|
|
!__gen8_pte_index(start, 0)) {
|
|
|
|
index = __gen8_pte_index(start, 1);
|
2017-10-07 05:18:24 +07:00
|
|
|
encode |= GEN8_PDE_PS_2M;
|
2019-07-12 18:27:25 +07:00
|
|
|
page_size = I915_GTT_PAGE_SIZE_2M;
|
2017-10-07 05:18:24 +07:00
|
|
|
|
|
|
|
vaddr = kmap_atomic_px(pd);
|
|
|
|
} else {
|
2019-07-12 18:27:25 +07:00
|
|
|
struct i915_page_table *pt =
|
|
|
|
i915_pt_entry(pd, __gen8_pte_index(start, 1));
|
2017-10-07 05:18:24 +07:00
|
|
|
|
2019-07-12 18:27:25 +07:00
|
|
|
index = __gen8_pte_index(start, 0);
|
2017-10-07 05:18:24 +07:00
|
|
|
page_size = I915_GTT_PAGE_SIZE;
|
|
|
|
|
2017-10-07 05:18:26 +07:00
|
|
|
if (!index &&
|
|
|
|
vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
|
|
|
|
IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
|
|
|
|
(IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
|
2019-07-12 18:27:25 +07:00
|
|
|
rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
|
|
|
|
maybe_64K = __gen8_pte_index(start, 1);
|
2017-10-07 05:18:26 +07:00
|
|
|
|
2017-10-07 05:18:24 +07:00
|
|
|
vaddr = kmap_atomic_px(pt);
|
|
|
|
}
|
|
|
|
|
|
|
|
do {
|
|
|
|
GEM_BUG_ON(iter->sg->length < page_size);
|
|
|
|
vaddr[index++] = encode | iter->dma;
|
|
|
|
|
|
|
|
start += page_size;
|
|
|
|
iter->dma += page_size;
|
|
|
|
rem -= page_size;
|
|
|
|
if (iter->dma >= iter->max) {
|
|
|
|
iter->sg = __sg_next(iter->sg);
|
|
|
|
if (!iter->sg)
|
|
|
|
break;
|
|
|
|
|
|
|
|
rem = iter->sg->length;
|
|
|
|
iter->dma = sg_dma_address(iter->sg);
|
|
|
|
iter->max = iter->dma + rem;
|
|
|
|
|
2019-07-12 18:27:25 +07:00
|
|
|
if (maybe_64K != -1 && index < I915_PDES &&
|
2017-10-07 05:18:26 +07:00
|
|
|
!(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
|
|
|
|
(IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
|
2019-07-12 18:27:25 +07:00
|
|
|
rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
|
|
|
|
maybe_64K = -1;
|
2017-10-07 05:18:26 +07:00
|
|
|
|
2017-10-07 05:18:24 +07:00
|
|
|
if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
|
|
|
|
break;
|
|
|
|
}
|
2019-07-12 18:27:25 +07:00
|
|
|
} while (rem >= page_size && index < I915_PDES);
|
2017-10-07 05:18:24 +07:00
|
|
|
|
|
|
|
kunmap_atomic(vaddr);
|
2017-10-07 05:18:26 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Is it safe to mark the 2M block as 64K? -- Either we have
|
|
|
|
* filled whole page-table with 64K entries, or filled part of
|
|
|
|
* it and have reached the end of the sg table and we have
|
|
|
|
* enough padding.
|
|
|
|
*/
|
2019-07-12 18:27:25 +07:00
|
|
|
if (maybe_64K != -1 &&
|
|
|
|
(index == I915_PDES ||
|
2017-10-07 05:18:26 +07:00
|
|
|
(i915_vm_has_scratch_64K(vma->vm) &&
|
|
|
|
!iter->sg && IS_ALIGNED(vma->node.start +
|
|
|
|
vma->node.size,
|
|
|
|
I915_GTT_PAGE_SIZE_2M)))) {
|
|
|
|
vaddr = kmap_atomic_px(pd);
|
2019-07-12 18:27:25 +07:00
|
|
|
vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
|
2017-10-07 05:18:26 +07:00
|
|
|
kunmap_atomic(vaddr);
|
2017-10-07 05:18:27 +07:00
|
|
|
page_size = I915_GTT_PAGE_SIZE_64K;
|
2018-05-11 16:51:40 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We write all 4K page entries, even when using 64K
|
|
|
|
* pages. In order to verify that the HW isn't cheating
|
|
|
|
* by using the 4K PTE instead of the 64K PTE, we want
|
|
|
|
* to remove all the surplus entries. If the HW skipped
|
|
|
|
* the 64K PTE, it will read/write into the scratch page
|
|
|
|
* instead - which we detect as missing results during
|
|
|
|
* selftests.
|
|
|
|
*/
|
|
|
|
if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
|
|
|
|
u16 i;
|
|
|
|
|
2019-07-12 16:43:26 +07:00
|
|
|
encode = vma->vm->scratch[0].encode;
|
2019-07-12 18:27:25 +07:00
|
|
|
vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
|
2018-05-11 16:51:40 +07:00
|
|
|
|
|
|
|
for (i = 1; i < index; i += 16)
|
|
|
|
memset64(vaddr + i, encode, 15);
|
|
|
|
|
|
|
|
kunmap_atomic(vaddr);
|
|
|
|
}
|
2017-10-07 05:18:26 +07:00
|
|
|
}
|
2017-10-07 05:18:27 +07:00
|
|
|
|
|
|
|
vma->page_sizes.gtt |= page_size;
|
2017-10-07 05:18:24 +07:00
|
|
|
} while (iter->sg);
|
|
|
|
}
|
|
|
|
|
2019-08-16 16:47:54 +07:00
|
|
|
static void gen8_ppgtt_insert(struct i915_address_space *vm,
|
|
|
|
struct i915_vma *vma,
|
|
|
|
enum i915_cache_level cache_level,
|
|
|
|
u32 flags)
|
2017-02-15 15:43:37 +07:00
|
|
|
{
|
2019-08-16 16:47:54 +07:00
|
|
|
struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
|
2017-11-07 04:11:28 +07:00
|
|
|
struct sgt_dma iter = sgt_dma(vma);
|
2015-08-03 15:53:27 +07:00
|
|
|
|
2017-10-07 05:18:24 +07:00
|
|
|
if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
|
2019-08-16 16:47:54 +07:00
|
|
|
gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags);
|
|
|
|
} else {
|
2019-07-12 18:27:25 +07:00
|
|
|
u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
|
2017-10-07 05:18:24 +07:00
|
|
|
|
2019-08-16 16:47:54 +07:00
|
|
|
do {
|
|
|
|
struct i915_page_directory * const pdp =
|
|
|
|
gen8_pdp_for_page_index(vm, idx);
|
|
|
|
|
|
|
|
idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx,
|
|
|
|
cache_level, flags);
|
|
|
|
} while (idx);
|
2017-10-07 05:18:27 +07:00
|
|
|
|
|
|
|
vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
|
2017-10-07 05:18:24 +07:00
|
|
|
}
|
2015-07-30 17:02:49 +07:00
|
|
|
}
|
|
|
|
|
2015-06-30 22:16:40 +07:00
|
|
|
static int gen8_init_scratch(struct i915_address_space *vm)
|
|
|
|
{
|
2016-04-27 19:19:25 +07:00
|
|
|
int ret;
|
2019-07-12 16:43:26 +07:00
|
|
|
int i;
|
2015-06-30 22:16:40 +07:00
|
|
|
|
2018-10-30 01:27:21 +07:00
|
|
|
/*
|
|
|
|
* If everybody agrees to not to write into the scratch page,
|
|
|
|
* we can reuse it for all vm, keeping contexts and processes separate.
|
|
|
|
*/
|
|
|
|
if (vm->has_read_only &&
|
|
|
|
vm->i915->kernel_context &&
|
2019-06-11 16:12:37 +07:00
|
|
|
vm->i915->kernel_context->vm) {
|
2019-10-04 20:40:09 +07:00
|
|
|
struct i915_address_space *clone =
|
|
|
|
rcu_dereference_protected(vm->i915->kernel_context->vm,
|
|
|
|
true); /* static */
|
2018-10-30 01:27:21 +07:00
|
|
|
|
|
|
|
GEM_BUG_ON(!clone->has_read_only);
|
|
|
|
|
2019-03-05 20:54:27 +07:00
|
|
|
vm->scratch_order = clone->scratch_order;
|
2019-07-12 16:43:26 +07:00
|
|
|
memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch));
|
|
|
|
px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */
|
2018-10-30 01:27:21 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-05-22 15:36:43 +07:00
|
|
|
ret = setup_scratch_page(vm, __GFP_HIGHMEM);
|
2016-08-22 14:44:30 +07:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2015-06-30 22:16:40 +07:00
|
|
|
|
2019-07-12 16:43:26 +07:00
|
|
|
vm->scratch[0].encode =
|
|
|
|
gen8_pte_encode(px_dma(&vm->scratch[0]),
|
|
|
|
I915_CACHE_LLC, vm->has_read_only);
|
2018-10-30 01:27:20 +07:00
|
|
|
|
2019-07-12 16:43:26 +07:00
|
|
|
for (i = 1; i <= vm->top; i++) {
|
|
|
|
if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i]))))
|
|
|
|
goto free_scratch;
|
2015-06-30 22:16:40 +07:00
|
|
|
|
2019-07-12 16:43:26 +07:00
|
|
|
fill_px(&vm->scratch[i], vm->scratch[i - 1].encode);
|
|
|
|
vm->scratch[i].encode =
|
|
|
|
gen8_pde_encode(px_dma(&vm->scratch[i]),
|
|
|
|
I915_CACHE_LLC);
|
2015-07-29 23:23:55 +07:00
|
|
|
}
|
|
|
|
|
2015-06-30 22:16:40 +07:00
|
|
|
return 0;
|
2016-04-27 19:19:25 +07:00
|
|
|
|
2019-07-12 16:43:26 +07:00
|
|
|
free_scratch:
|
|
|
|
free_scratch(vm);
|
|
|
|
return -ENOMEM;
|
2015-06-30 22:16:40 +07:00
|
|
|
}
|
|
|
|
|
2019-06-11 16:12:38 +07:00
|
|
|
static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
|
2015-08-28 14:41:14 +07:00
|
|
|
{
|
2018-06-05 22:37:58 +07:00
|
|
|
struct i915_address_space *vm = &ppgtt->vm;
|
2019-07-12 18:27:24 +07:00
|
|
|
struct i915_page_directory *pd = ppgtt->pd;
|
|
|
|
unsigned int idx;
|
|
|
|
|
|
|
|
GEM_BUG_ON(vm->top != 2);
|
2019-07-19 20:07:37 +07:00
|
|
|
GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES);
|
2019-07-12 18:27:24 +07:00
|
|
|
|
|
|
|
for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
|
|
|
|
struct i915_page_directory *pde;
|
2015-08-28 14:41:14 +07:00
|
|
|
|
2019-07-12 18:27:24 +07:00
|
|
|
pde = alloc_pd(vm);
|
|
|
|
if (IS_ERR(pde))
|
|
|
|
return PTR_ERR(pde);
|
2015-08-28 14:41:14 +07:00
|
|
|
|
2019-07-12 18:27:24 +07:00
|
|
|
fill_px(pde, vm->scratch[1].encode);
|
|
|
|
set_pd_entry(pd, idx, pde);
|
|
|
|
atomic_inc(px_used(pde)); /* keep pinned */
|
2017-02-15 15:43:48 +07:00
|
|
|
}
|
2019-08-23 21:14:21 +07:00
|
|
|
wmb();
|
2015-08-28 14:41:14 +07:00
|
|
|
|
2017-02-15 15:43:48 +07:00
|
|
|
return 0;
|
2015-08-28 14:41:14 +07:00
|
|
|
}
|
|
|
|
|
2019-06-21 14:07:59 +07:00
|
|
|
static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
|
2019-03-15 05:38:39 +07:00
|
|
|
{
|
2019-06-21 14:07:59 +07:00
|
|
|
struct drm_i915_private *i915 = gt->i915;
|
|
|
|
|
|
|
|
ppgtt->vm.gt = gt;
|
2019-03-15 05:38:39 +07:00
|
|
|
ppgtt->vm.i915 = i915;
|
|
|
|
ppgtt->vm.dma = &i915->drm.pdev->dev;
|
|
|
|
ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size);
|
|
|
|
|
|
|
|
i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
|
|
|
|
|
|
|
|
ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma;
|
|
|
|
ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma;
|
|
|
|
ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages;
|
|
|
|
ppgtt->vm.vma_ops.clear_pages = clear_pages;
|
|
|
|
}
|
|
|
|
|
2019-07-06 04:52:03 +07:00
|
|
|
static struct i915_page_directory *
|
|
|
|
gen8_alloc_top_pd(struct i915_address_space *vm)
|
|
|
|
{
|
2019-07-19 20:07:37 +07:00
|
|
|
const unsigned int count = gen8_pd_top_count(vm);
|
2019-07-06 04:52:03 +07:00
|
|
|
struct i915_page_directory *pd;
|
|
|
|
|
2019-07-12 16:43:27 +07:00
|
|
|
GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));
|
2019-07-06 04:52:03 +07:00
|
|
|
|
2019-07-12 18:27:22 +07:00
|
|
|
pd = __alloc_pd(offsetof(typeof(*pd), entry[count]));
|
|
|
|
if (unlikely(!pd))
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
if (unlikely(setup_page_dma(vm, px_base(pd)))) {
|
|
|
|
kfree(pd);
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
2019-07-06 04:52:03 +07:00
|
|
|
|
2019-07-12 16:43:27 +07:00
|
|
|
fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
|
2019-07-12 18:27:23 +07:00
|
|
|
atomic_inc(px_used(pd)); /* mark as pinned */
|
2019-07-06 04:52:03 +07:00
|
|
|
return pd;
|
|
|
|
}
|
|
|
|
|
2015-03-18 20:47:59 +07:00
|
|
|
/*
|
2014-02-20 13:05:42 +07:00
|
|
|
* GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
|
|
|
|
* with a net effect resembling a 2-level page table in normal x86 terms. Each
|
|
|
|
* PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
|
|
|
|
* space.
|
2013-11-05 11:47:32 +07:00
|
|
|
*
|
2014-02-20 13:05:42 +07:00
|
|
|
*/
|
2019-06-11 16:12:38 +07:00
|
|
|
static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
|
2013-11-05 11:47:32 +07:00
|
|
|
{
|
2019-06-11 16:12:38 +07:00
|
|
|
struct i915_ppgtt *ppgtt;
|
2018-06-07 23:30:40 +07:00
|
|
|
int err;
|
|
|
|
|
|
|
|
ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
|
|
|
|
if (!ppgtt)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
2019-06-21 14:07:59 +07:00
|
|
|
ppgtt_init(ppgtt, &i915->gt);
|
2019-07-12 16:43:24 +07:00
|
|
|
ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
|
2017-02-28 22:28:09 +07:00
|
|
|
|
2019-04-11 15:30:34 +07:00
|
|
|
/*
|
|
|
|
* From bdw, there is hw support for read-only pages in the PPGTT.
|
|
|
|
*
|
|
|
|
* Gen11 has HSDES#:1807136187 unresolved. Disable ro support
|
|
|
|
* for now.
|
2019-09-11 19:57:17 +07:00
|
|
|
*
|
|
|
|
* Gen12 has inherited the same read-only fault issue from gen11.
|
2019-04-11 15:30:34 +07:00
|
|
|
*/
|
2019-09-11 19:57:17 +07:00
|
|
|
ppgtt->vm.has_read_only = !IS_GEN_RANGE(i915, 11, 12);
|
2018-07-13 01:53:11 +07:00
|
|
|
|
2017-02-15 15:43:40 +07:00
|
|
|
/* There are only few exceptions for gen >=6. chv and bxt.
|
|
|
|
* And we are not sure about the latter so play safe for now.
|
|
|
|
*/
|
2018-06-07 23:30:40 +07:00
|
|
|
if (IS_CHERRYVIEW(i915) || IS_BROXTON(i915))
|
2018-06-05 22:37:58 +07:00
|
|
|
ppgtt->vm.pt_kmap_wc = true;
|
2017-02-15 15:43:40 +07:00
|
|
|
|
2018-06-07 23:30:40 +07:00
|
|
|
err = gen8_init_scratch(&ppgtt->vm);
|
|
|
|
if (err)
|
|
|
|
goto err_free;
|
2017-08-23 00:38:28 +07:00
|
|
|
|
2019-07-06 04:52:03 +07:00
|
|
|
ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm);
|
|
|
|
if (IS_ERR(ppgtt->pd)) {
|
|
|
|
err = PTR_ERR(ppgtt->pd);
|
2019-06-14 23:43:45 +07:00
|
|
|
goto err_free_scratch;
|
2019-06-14 23:43:42 +07:00
|
|
|
}
|
2015-07-29 23:23:46 +07:00
|
|
|
|
2019-08-16 16:47:54 +07:00
|
|
|
if (!i915_vm_is_4lvl(&ppgtt->vm)) {
|
2019-08-23 21:14:21 +07:00
|
|
|
err = gen8_preallocate_top_level_pdp(ppgtt);
|
|
|
|
if (err)
|
|
|
|
goto err_free_pd;
|
2015-08-03 15:52:01 +07:00
|
|
|
}
|
2015-07-29 23:23:46 +07:00
|
|
|
|
2019-10-04 20:39:57 +07:00
|
|
|
ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
|
2019-08-16 16:47:54 +07:00
|
|
|
ppgtt->vm.insert_entries = gen8_ppgtt_insert;
|
2019-07-12 18:27:24 +07:00
|
|
|
ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
|
2019-07-12 18:27:23 +07:00
|
|
|
ppgtt->vm.clear_range = gen8_ppgtt_clear;
|
|
|
|
|
2018-06-07 23:30:40 +07:00
|
|
|
if (intel_vgpu_active(i915))
|
2015-08-28 14:41:18 +07:00
|
|
|
gen8_ppgtt_notify_vgt(ppgtt, true);
|
|
|
|
|
2018-06-05 22:37:58 +07:00
|
|
|
ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
|
2017-02-28 22:28:11 +07:00
|
|
|
|
2018-06-07 23:30:40 +07:00
|
|
|
return ppgtt;
|
2015-07-29 23:23:46 +07:00
|
|
|
|
2019-07-06 04:52:03 +07:00
|
|
|
err_free_pd:
|
2019-07-12 18:27:22 +07:00
|
|
|
__gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd,
|
2019-07-19 20:07:37 +07:00
|
|
|
gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top);
|
2019-06-14 23:43:45 +07:00
|
|
|
err_free_scratch:
|
2019-07-12 14:58:18 +07:00
|
|
|
free_scratch(&ppgtt->vm);
|
2018-06-07 23:30:40 +07:00
|
|
|
err_free:
|
|
|
|
kfree(ppgtt);
|
|
|
|
return ERR_PTR(err);
|
drm/i915/gen8: Dynamic page table allocations
This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.
In LRC mode, contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet. Check if
PDPs have been allocated and use the scratch page if they do not exist yet.
Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.
v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.
v3: Rebase.
v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).
v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.
v6: Update err_out case in gen8_alloc_va_range (missed from lastest
rebase).
v7: Rebase after s/page_tables/page_table/.
v8: Updated scratch_pt check after scratch flag was removed in previous
patch.
v9: Note that lrc mode needs to be updated to support init state without
any PDP.
v10: Unmap correct page_table in gen8_alloc_va_range's error case, clean-up
gen8_aliasing_ppgtt_init (remove duplicated map), and initialize PTs
during page table allocation.
v11: Squashed LRC enabling commit, otherwise LRC mode would be left broken
until it was updated to handle the init case without any PDP.
v12: Do not overallocate new_pts bitmap, make alloc_gen8_temp_bitmaps
static and don't abuse of inline functions. (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-04-08 18:13:34 +07:00
|
|
|
}
|
|
|
|
|
2015-03-16 23:00:56 +07:00
|
|
|
/* Write pde (index) from the page directory @pd to the page table @pt */
|
2019-06-11 16:12:38 +07:00
|
|
|
static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
|
2017-02-15 15:43:45 +07:00
|
|
|
const unsigned int pde,
|
|
|
|
const struct i915_page_table *pt)
|
2013-04-09 08:43:54 +07:00
|
|
|
{
|
2015-03-16 23:00:56 +07:00
|
|
|
/* Caller needs to make sure the write completes if necessary */
|
2018-06-12 00:18:24 +07:00
|
|
|
iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
|
|
|
|
ppgtt->pd_addr + pde);
|
2015-03-16 23:00:56 +07:00
|
|
|
}
|
2013-04-09 08:43:54 +07:00
|
|
|
|
2019-06-21 14:07:51 +07:00
|
|
|
static void gen7_ppgtt_enable(struct intel_gt *gt)
|
2013-04-24 13:15:32 +07:00
|
|
|
{
|
2019-06-21 14:07:51 +07:00
|
|
|
struct drm_i915_private *i915 = gt->i915;
|
|
|
|
struct intel_uncore *uncore = gt->uncore;
|
2016-03-16 18:00:36 +07:00
|
|
|
struct intel_engine_cs *engine;
|
drm/i915: Allocate intel_engine_cs structure only for the enabled engines
With the possibility of addition of many more number of rings in future,
the drm_i915_private structure could bloat as an array, of type
intel_engine_cs, is embedded inside it.
struct intel_engine_cs engine[I915_NUM_ENGINES];
Though this is still fine as generally there is only a single instance of
drm_i915_private structure used, but not all of the possible rings would be
enabled or active on most of the platforms. Some memory can be saved by
allocating intel_engine_cs structure only for the enabled/active engines.
Currently the engine/ring ID is kept static and dev_priv->engine[] is simply
indexed using the enums defined in intel_engine_id.
To save memory and continue using the static engine/ring IDs, 'engine' is
defined as an array of pointers.
struct intel_engine_cs *engine[I915_NUM_ENGINES];
dev_priv->engine[engine_ID] will be NULL for disabled engine instances.
There is a text size reduction of 928 bytes, from 1028200 to 1027272, for
i915.o file (but for i915.ko file text size remain same as 1193131 bytes).
v2:
- Remove the engine iterator field added in drm_i915_private structure,
instead pass a local iterator variable to the for_each_engine**
macros. (Chris)
- Do away with intel_engine_initialized() and instead directly use the
NULL pointer check on engine pointer. (Chris)
v3:
- Remove for_each_engine_id() macro, as the updated macro for_each_engine()
can be used in place of it. (Chris)
- Protect the access to Render engine Fault register with a NULL check, as
engine specific init is done later in Driver load sequence.
v4:
- Use !!dev_priv->engine[VCS] style for the engine check in getparam. (Chris)
- Kill the superfluous init_engine_lists().
v5:
- Cleanup the intel_engines_init() & intel_engines_setup(), with respect to
allocation of intel_engine_cs structure. (Chris)
v6:
- Rebase.
v7:
- Optimize the for_each_engine_masked() macro. (Chris)
- Change the type of 'iter' local variable to enum intel_engine_id. (Chris)
- Rebase.
v8: Rebase.
v9: Rebase.
v10:
- For index calculation use engine ID instead of pointer based arithmetic in
intel_engine_sync_index() as engine pointers are not contiguous now (Chris)
- For appropriateness, rename local enum variable 'iter' to 'id'. (Joonas)
- Use for_each_engine macro for cleanup in intel_engines_init() and remove
check for NULL engine pointer in cleanup() routines. (Joonas)
v11: Rebase.
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Akash Goel <akash.goel@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1476378888-7372-1-git-send-email-akash.goel@intel.com
2016-10-14 00:14:48 +07:00
|
|
|
enum intel_engine_id id;
|
2019-06-21 14:07:52 +07:00
|
|
|
u32 ecochk;
|
2013-04-09 08:43:54 +07:00
|
|
|
|
2019-06-21 14:07:52 +07:00
|
|
|
intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B);
|
2013-04-04 19:13:41 +07:00
|
|
|
|
2019-06-21 14:07:51 +07:00
|
|
|
ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
|
|
|
|
if (IS_HASWELL(i915)) {
|
2013-12-07 05:11:09 +07:00
|
|
|
ecochk |= ECOCHK_PPGTT_WB_HSW;
|
|
|
|
} else {
|
|
|
|
ecochk |= ECOCHK_PPGTT_LLC_IVB;
|
|
|
|
ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
|
|
|
|
}
|
2019-06-21 14:07:51 +07:00
|
|
|
intel_uncore_write(uncore, GAM_ECOCHK, ecochk);
|
2013-04-04 19:13:41 +07:00
|
|
|
|
2019-06-21 14:07:51 +07:00
|
|
|
for_each_engine(engine, i915, id) {
|
2013-04-09 08:43:54 +07:00
|
|
|
/* GFX_MODE is per-ring on gen7+ */
|
2019-06-07 15:45:20 +07:00
|
|
|
ENGINE_WRITE(engine,
|
|
|
|
RING_MODE_GEN7,
|
|
|
|
_MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
|
2013-04-09 08:43:54 +07:00
|
|
|
}
|
2013-12-07 05:11:09 +07:00
|
|
|
}
|
2013-04-09 08:43:54 +07:00
|
|
|
|
2019-06-21 14:07:51 +07:00
|
|
|
static void gen6_ppgtt_enable(struct intel_gt *gt)
|
2013-12-07 05:11:09 +07:00
|
|
|
{
|
2019-06-21 14:07:51 +07:00
|
|
|
struct intel_uncore *uncore = gt->uncore;
|
2013-04-04 19:13:41 +07:00
|
|
|
|
2019-06-21 14:07:52 +07:00
|
|
|
intel_uncore_rmw(uncore,
|
|
|
|
GAC_ECO_BITS,
|
|
|
|
0,
|
|
|
|
ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B);
|
2013-04-09 08:43:54 +07:00
|
|
|
|
2019-06-21 14:07:52 +07:00
|
|
|
intel_uncore_rmw(uncore,
|
|
|
|
GAB_CTL,
|
|
|
|
0,
|
|
|
|
GAB_CTL_CONT_AFTER_PAGEFAULT);
|
2013-12-07 05:11:09 +07:00
|
|
|
|
2019-06-21 14:07:52 +07:00
|
|
|
intel_uncore_rmw(uncore,
|
|
|
|
GAM_ECOCHK,
|
|
|
|
0,
|
|
|
|
ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
|
2013-12-07 05:11:09 +07:00
|
|
|
|
2019-06-21 14:07:51 +07:00
|
|
|
if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */
|
|
|
|
intel_uncore_write(uncore,
|
|
|
|
GFX_MODE,
|
|
|
|
_MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
|
2013-04-09 08:43:54 +07:00
|
|
|
}
|
|
|
|
|
2012-02-09 23:15:46 +07:00
|
|
|
/* PPGTT support for Sandybdrige/Gen6 and later */
|
2013-07-17 06:50:05 +07:00
|
|
|
static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
|
2017-02-15 15:43:46 +07:00
|
|
|
u64 start, u64 length)
|
2012-02-09 23:15:46 +07:00
|
|
|
{
|
2019-06-14 23:43:42 +07:00
|
|
|
struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
|
|
|
|
const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
|
2019-07-12 16:43:26 +07:00
|
|
|
const gen6_pte_t scratch_pte = vm->scratch[0].encode;
|
2017-02-15 15:43:46 +07:00
|
|
|
unsigned int pde = first_entry / GEN6_PTES;
|
|
|
|
unsigned int pte = first_entry % GEN6_PTES;
|
2018-09-18 00:14:14 +07:00
|
|
|
unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
|
2012-02-09 23:15:46 +07:00
|
|
|
|
2012-02-09 23:15:47 +07:00
|
|
|
while (num_entries) {
|
2019-06-14 23:43:42 +07:00
|
|
|
struct i915_page_table * const pt =
|
|
|
|
i915_pt_entry(ppgtt->base.pd, pde++);
|
2019-03-05 06:06:46 +07:00
|
|
|
const unsigned int count = min(num_entries, GEN6_PTES - pte);
|
2017-02-15 15:43:46 +07:00
|
|
|
gen6_pte_t *vaddr;
|
2012-02-09 23:15:47 +07:00
|
|
|
|
2019-07-12 16:43:26 +07:00
|
|
|
GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
|
2018-06-14 20:43:15 +07:00
|
|
|
|
|
|
|
num_entries -= count;
|
|
|
|
|
2019-06-14 23:43:42 +07:00
|
|
|
GEM_BUG_ON(count > atomic_read(&pt->used));
|
|
|
|
if (!atomic_sub_return(count, &pt->used))
|
2018-06-14 20:43:15 +07:00
|
|
|
ppgtt->scan_for_unused_pt = true;
|
2012-02-09 23:15:46 +07:00
|
|
|
|
2018-06-14 20:43:14 +07:00
|
|
|
/*
|
|
|
|
* Note that the hw doesn't support removing PDE on the fly
|
2017-02-15 15:43:46 +07:00
|
|
|
* (they are cached inside the context with no means to
|
|
|
|
* invalidate the cache), so we can only reset the PTE
|
|
|
|
* entries back to scratch.
|
|
|
|
*/
|
2012-02-09 23:15:46 +07:00
|
|
|
|
2017-02-15 15:43:46 +07:00
|
|
|
vaddr = kmap_atomic_px(pt);
|
2019-03-05 06:06:46 +07:00
|
|
|
memset32(vaddr + pte, scratch_pte, count);
|
2017-02-15 15:43:46 +07:00
|
|
|
kunmap_atomic(vaddr);
|
2012-02-09 23:15:46 +07:00
|
|
|
|
2017-02-15 15:43:46 +07:00
|
|
|
pte = 0;
|
2012-02-09 23:15:47 +07:00
|
|
|
}
|
2012-02-09 23:15:46 +07:00
|
|
|
}
|
|
|
|
|
2013-07-17 06:50:05 +07:00
|
|
|
static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
|
2017-06-22 16:58:36 +07:00
|
|
|
struct i915_vma *vma,
|
2017-02-15 15:43:57 +07:00
|
|
|
enum i915_cache_level cache_level,
|
|
|
|
u32 flags)
|
2013-01-25 05:44:56 +07:00
|
|
|
{
|
2019-06-11 16:12:38 +07:00
|
|
|
struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
|
2019-06-14 23:43:42 +07:00
|
|
|
struct i915_page_directory * const pd = ppgtt->pd;
|
2018-09-18 00:14:14 +07:00
|
|
|
unsigned first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
|
2015-03-16 23:00:54 +07:00
|
|
|
unsigned act_pt = first_entry / GEN6_PTES;
|
|
|
|
unsigned act_pte = first_entry % GEN6_PTES;
|
2017-02-15 15:43:36 +07:00
|
|
|
const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
|
2017-11-07 04:11:28 +07:00
|
|
|
struct sgt_dma iter = sgt_dma(vma);
|
2017-02-15 15:43:36 +07:00
|
|
|
gen6_pte_t *vaddr;
|
|
|
|
|
2019-07-12 16:43:26 +07:00
|
|
|
GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]);
|
2018-06-14 20:43:15 +07:00
|
|
|
|
2019-06-14 23:43:42 +07:00
|
|
|
vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
|
2017-02-15 15:43:36 +07:00
|
|
|
do {
|
|
|
|
vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
|
2013-02-19 00:28:04 +07:00
|
|
|
|
2018-09-13 22:04:05 +07:00
|
|
|
iter.dma += I915_GTT_PAGE_SIZE;
|
2017-02-15 15:43:36 +07:00
|
|
|
if (iter.dma == iter.max) {
|
|
|
|
iter.sg = __sg_next(iter.sg);
|
|
|
|
if (!iter.sg)
|
|
|
|
break;
|
2013-02-19 00:28:04 +07:00
|
|
|
|
2017-02-15 15:43:36 +07:00
|
|
|
iter.dma = sg_dma_address(iter.sg);
|
|
|
|
iter.max = iter.dma + iter.sg->length;
|
|
|
|
}
|
2014-06-17 12:29:42 +07:00
|
|
|
|
2015-03-16 23:00:54 +07:00
|
|
|
if (++act_pte == GEN6_PTES) {
|
2017-02-15 15:43:41 +07:00
|
|
|
kunmap_atomic(vaddr);
|
2019-06-14 23:43:42 +07:00
|
|
|
vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt));
|
2013-02-19 00:28:04 +07:00
|
|
|
act_pte = 0;
|
2013-01-25 05:44:56 +07:00
|
|
|
}
|
2017-02-15 15:43:36 +07:00
|
|
|
} while (1);
|
2017-02-15 15:43:41 +07:00
|
|
|
kunmap_atomic(vaddr);
|
2017-10-07 05:18:27 +07:00
|
|
|
|
|
|
|
vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
|
2013-01-25 05:44:56 +07:00
|
|
|
}
|
|
|
|
|
2015-03-16 23:00:56 +07:00
|
|
|
static int gen6_alloc_va_range(struct i915_address_space *vm,
|
2017-02-15 15:43:46 +07:00
|
|
|
u64 start, u64 length)
|
2015-03-16 23:00:56 +07:00
|
|
|
{
|
2019-06-11 16:12:38 +07:00
|
|
|
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
|
2019-06-14 23:43:42 +07:00
|
|
|
struct i915_page_directory * const pd = ppgtt->base.pd;
|
2019-06-17 21:04:26 +07:00
|
|
|
struct i915_page_table *pt, *alloc = NULL;
|
2019-05-29 19:31:07 +07:00
|
|
|
intel_wakeref_t wakeref;
|
2017-02-15 15:43:46 +07:00
|
|
|
u64 from = start;
|
|
|
|
unsigned int pde;
|
|
|
|
bool flush = false;
|
2019-06-17 21:04:26 +07:00
|
|
|
int ret = 0;
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 22:46:22 +07:00
|
|
|
|
2019-06-14 06:21:54 +07:00
|
|
|
wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
|
2019-05-29 19:31:07 +07:00
|
|
|
|
2019-06-14 23:43:42 +07:00
|
|
|
spin_lock(&pd->lock);
|
|
|
|
gen6_for_each_pde(pt, pd, start, length, pde) {
|
2018-06-14 20:43:15 +07:00
|
|
|
const unsigned int count = gen6_pte_count(start, length);
|
|
|
|
|
2019-07-12 16:43:26 +07:00
|
|
|
if (px_base(pt) == px_base(&vm->scratch[1])) {
|
2019-06-14 23:43:42 +07:00
|
|
|
spin_unlock(&pd->lock);
|
2019-06-04 22:38:30 +07:00
|
|
|
|
2019-06-17 21:04:26 +07:00
|
|
|
pt = fetch_and_zero(&alloc);
|
|
|
|
if (!pt)
|
|
|
|
pt = alloc_pt(vm);
|
|
|
|
if (IS_ERR(pt)) {
|
|
|
|
ret = PTR_ERR(pt);
|
2017-02-15 15:43:46 +07:00
|
|
|
goto unwind_out;
|
2019-06-17 21:04:26 +07:00
|
|
|
}
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 22:46:22 +07:00
|
|
|
|
2019-07-12 16:43:26 +07:00
|
|
|
fill32_px(pt, vm->scratch[0].encode);
|
2018-06-12 19:04:46 +07:00
|
|
|
|
2019-06-17 21:04:26 +07:00
|
|
|
spin_lock(&pd->lock);
|
2019-07-12 16:43:26 +07:00
|
|
|
if (pd->entry[pde] == &vm->scratch[1]) {
|
2019-06-17 21:04:26 +07:00
|
|
|
pd->entry[pde] = pt;
|
2019-06-04 22:38:30 +07:00
|
|
|
if (i915_vma_is_bound(ppgtt->vma,
|
|
|
|
I915_VMA_GLOBAL_BIND)) {
|
|
|
|
gen6_write_pde(ppgtt, pde, pt);
|
|
|
|
flush = true;
|
|
|
|
}
|
|
|
|
} else {
|
2019-06-17 21:04:26 +07:00
|
|
|
alloc = pt;
|
|
|
|
pt = pd->entry[pde];
|
2018-06-12 19:04:46 +07:00
|
|
|
}
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 22:46:22 +07:00
|
|
|
}
|
2018-06-14 20:43:15 +07:00
|
|
|
|
2019-06-14 23:43:42 +07:00
|
|
|
atomic_add(count, &pt->used);
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 22:46:22 +07:00
|
|
|
}
|
2019-06-14 23:43:42 +07:00
|
|
|
spin_unlock(&pd->lock);
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 22:46:22 +07:00
|
|
|
|
2019-08-31 01:00:00 +07:00
|
|
|
if (flush)
|
2019-06-21 14:08:08 +07:00
|
|
|
gen6_ggtt_invalidate(vm->gt->ggtt);
|
2015-03-16 23:00:56 +07:00
|
|
|
|
2019-06-17 21:04:26 +07:00
|
|
|
goto out;
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 22:46:22 +07:00
|
|
|
|
|
|
|
unwind_out:
|
2018-06-09 00:32:21 +07:00
|
|
|
gen6_ppgtt_clear_range(vm, from, start - from);
|
2019-06-17 21:04:26 +07:00
|
|
|
out:
|
|
|
|
if (alloc)
|
2019-07-12 16:43:22 +07:00
|
|
|
free_px(vm, alloc);
|
2019-06-17 21:04:26 +07:00
|
|
|
intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
|
|
|
|
return ret;
|
2015-03-16 23:00:56 +07:00
|
|
|
}
|
|
|
|
|
2019-06-11 16:12:38 +07:00
|
|
|
static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
|
2015-06-30 22:16:40 +07:00
|
|
|
{
|
2018-06-12 19:04:46 +07:00
|
|
|
struct i915_address_space * const vm = &ppgtt->base.vm;
|
2019-06-14 23:43:42 +07:00
|
|
|
struct i915_page_directory * const pd = ppgtt->base.pd;
|
2016-08-22 14:44:30 +07:00
|
|
|
int ret;
|
2015-06-30 22:16:40 +07:00
|
|
|
|
2018-05-22 15:36:43 +07:00
|
|
|
ret = setup_scratch_page(vm, __GFP_HIGHMEM);
|
2016-08-22 14:44:30 +07:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2015-06-30 22:16:40 +07:00
|
|
|
|
2019-07-12 16:43:26 +07:00
|
|
|
vm->scratch[0].encode =
|
|
|
|
vm->pte_encode(px_dma(&vm->scratch[0]),
|
|
|
|
I915_CACHE_NONE, PTE_READ_ONLY);
|
2018-06-15 01:42:17 +07:00
|
|
|
|
2019-07-12 16:43:26 +07:00
|
|
|
if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) {
|
2017-02-15 15:43:40 +07:00
|
|
|
cleanup_scratch_page(vm);
|
2019-07-12 14:58:18 +07:00
|
|
|
return -ENOMEM;
|
2015-06-30 22:16:40 +07:00
|
|
|
}
|
2019-06-14 23:43:42 +07:00
|
|
|
|
2019-07-12 16:43:26 +07:00
|
|
|
fill32_px(&vm->scratch[1], vm->scratch[0].encode);
|
|
|
|
memset_p(pd->entry, &vm->scratch[1], I915_PDES);
|
2015-06-30 22:16:40 +07:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-06-11 16:12:38 +07:00
|
|
|
static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
|
2014-02-20 13:05:48 +07:00
|
|
|
{
|
2019-06-14 23:43:42 +07:00
|
|
|
struct i915_page_directory * const pd = ppgtt->base.pd;
|
2019-07-12 16:43:26 +07:00
|
|
|
struct i915_page_dma * const scratch =
|
|
|
|
px_base(&ppgtt->base.vm.scratch[1]);
|
2015-04-08 18:13:30 +07:00
|
|
|
struct i915_page_table *pt;
|
2017-02-15 15:43:57 +07:00
|
|
|
u32 pde;
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 22:46:22 +07:00
|
|
|
|
2019-06-14 23:43:42 +07:00
|
|
|
gen6_for_all_pdes(pt, pd, pde)
|
2019-07-12 16:43:26 +07:00
|
|
|
if (px_base(pt) != scratch)
|
2019-07-12 16:43:22 +07:00
|
|
|
free_px(&ppgtt->base.vm, pt);
|
2018-06-12 15:18:15 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
|
|
|
|
{
|
2019-06-11 16:12:38 +07:00
|
|
|
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
|
drm/i915: Create page table allocators
As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks. This makes the
code easier to write, read, and verify.
Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,
The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain additional complexity as a
result.
This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.
2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).
3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.
4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.
v2: Updated commit message to explain why this patch exists
v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/
v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel)
v5: Added additional safety checks in gen8 clear/free/unmap.
v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika).
v7: Make err_out loop symmetrical to the way we allocate in
alloc_pt_range. Also s/page_tables/page_table and correct commit
message (Mika)
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-02-24 23:22:36 +07:00
|
|
|
|
2019-06-21 01:37:05 +07:00
|
|
|
i915_vma_destroy(ppgtt->vma);
|
2018-06-12 15:18:15 +07:00
|
|
|
|
|
|
|
gen6_ppgtt_free_pd(ppgtt);
|
2019-07-12 14:58:18 +07:00
|
|
|
free_scratch(vm);
|
2019-09-13 13:42:00 +07:00
|
|
|
|
|
|
|
mutex_destroy(&ppgtt->pin_mutex);
|
2019-06-14 23:43:42 +07:00
|
|
|
kfree(ppgtt->base.pd);
|
2013-01-25 04:49:56 +07:00
|
|
|
}
|
|
|
|
|
2018-06-12 19:04:46 +07:00
|
|
|
static int pd_vma_set_pages(struct i915_vma *vma)
|
2013-01-25 04:49:56 +07:00
|
|
|
{
|
2018-06-12 19:04:46 +07:00
|
|
|
vma->pages = ERR_PTR(-ENODEV);
|
|
|
|
return 0;
|
|
|
|
}
|
2012-02-09 23:15:46 +07:00
|
|
|
|
2018-06-12 19:04:46 +07:00
|
|
|
static void pd_vma_clear_pages(struct i915_vma *vma)
|
|
|
|
{
|
|
|
|
GEM_BUG_ON(!vma->pages);
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 22:46:22 +07:00
|
|
|
|
2018-06-12 19:04:46 +07:00
|
|
|
vma->pages = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int pd_vma_bind(struct i915_vma *vma,
|
|
|
|
enum i915_cache_level cache_level,
|
|
|
|
u32 unused)
|
|
|
|
{
|
|
|
|
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
|
2019-06-11 16:12:38 +07:00
|
|
|
struct gen6_ppgtt *ppgtt = vma->private;
|
2018-09-13 22:04:05 +07:00
|
|
|
u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
|
2018-06-12 19:04:46 +07:00
|
|
|
struct i915_page_table *pt;
|
|
|
|
unsigned int pde;
|
2015-03-16 23:00:56 +07:00
|
|
|
|
2019-07-12 16:43:22 +07:00
|
|
|
px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
|
2018-06-12 19:04:46 +07:00
|
|
|
ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
|
2012-02-09 23:15:46 +07:00
|
|
|
|
2019-06-14 23:43:42 +07:00
|
|
|
gen6_for_all_pdes(pt, ppgtt->base.pd, pde)
|
2018-06-12 19:04:46 +07:00
|
|
|
gen6_write_pde(ppgtt, pde, pt);
|
2017-02-15 15:43:43 +07:00
|
|
|
|
2019-06-21 14:07:58 +07:00
|
|
|
gen6_ggtt_invalidate(ggtt);
|
2017-02-15 15:43:43 +07:00
|
|
|
|
2015-01-23 00:01:25 +07:00
|
|
|
return 0;
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 22:46:22 +07:00
|
|
|
}
|
2015-02-24 23:22:37 +07:00
|
|
|
|
2018-06-12 19:04:46 +07:00
|
|
|
static void pd_vma_unbind(struct i915_vma *vma)
|
drm/i915: Finish gen6/7 dynamic page table allocation
This patch continues on the idea from "Track GEN6 page table usage".
From here on, in the steady state, PDEs are all pointing to the scratch
page table (as recommended in the spec). When an object is allocated in
the VA range, the code will determine if we need to allocate a page for
the page table. Similarly when the object is destroyed, we will remove,
and free the page table pointing the PDE back to the scratch page.
Following patches will work to unify the code a bit as we bring in GEN8
support. GEN6 and GEN8 are different enough that I had a hard time to
get to this point with as much common code as I do.
The aliasing PPGTT must pre-allocate all of the page tables. There are a
few reasons for this. Two trivial ones: aliasing ppgtt goes through the
ggtt paths, so it's hard to maintain, we currently do not restore the
default context (assuming the previous force reload is indeed
necessary). Most importantly though, the only way (it seems from
empirical evidence) to invalidate the CS TLBs on non-render ring is to
either use ring sync (which requires actually stopping the rings in
order to synchronize when the sync completes vs. where you are in
execution), or to reload DCLV. Since without full PPGTT we do not ever
reload the DCLV register, there is no good way to achieve this. The
simplest solution is just to not support dynamic page table
creation/destruction in the aliasing PPGTT.
We could always reload DCLV, but this seems like quite a bit of excess
overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
page tables.
v2: Make the page table bitmap declared inside the function (Chris)
Simplify the way scratching address space works.
Move the alloc/teardown tracepoints up a level in the call stack so that
both all implementations get the trace.
v3: Updated trace event to spit out a name
v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check
for trace, as it is no longer possible after the PPGTT cleanup patch series
of a couple of months ago (Daniel).
v6: Implement changes from code review (Daniel):
- allocate/teardown_va_range calls added.
- Add a scratch page allocation helper (only need the address).
- Move trace events to a new patch.
- Use updated mark_tlbs_dirty.
- Moved pt preallocation for aliasing ppgtt into gen6_ppgtt_init.
v7: teardown_va_range removed (Daniel).
In init, gen6_ppgtt_clear_range call is only needed for aliasing ppgtt.
v8: Rebase after s/page_tables/page_table/.
v9: Remove unnecessary scratch flag in page_table struct, future patches
can just compare against ppgtt->scratch_pt, and alloc_pt_scratch becomes
redundant. Initialize scratch_pt and pt. (Mika)
v10: Clean up aliasing ppgtt init error path and prevent leaking the
ppgtt obj when init fails. (Mika)
Updated commit author. (Daniel)
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v4+)
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-03-24 22:46:22 +07:00
|
|
|
{
|
2019-06-11 16:12:38 +07:00
|
|
|
struct gen6_ppgtt *ppgtt = vma->private;
|
2019-06-14 23:43:42 +07:00
|
|
|
struct i915_page_directory * const pd = ppgtt->base.pd;
|
2019-07-12 16:43:26 +07:00
|
|
|
struct i915_page_dma * const scratch =
|
|
|
|
px_base(&ppgtt->base.vm.scratch[1]);
|
2018-06-14 20:43:15 +07:00
|
|
|
struct i915_page_table *pt;
|
|
|
|
unsigned int pde;
|
|
|
|
|
|
|
|
if (!ppgtt->scan_for_unused_pt)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Free all no longer used page tables */
|
2019-06-14 23:43:42 +07:00
|
|
|
gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
|
2019-07-12 14:58:18 +07:00
|
|
|
if (px_base(pt) == scratch || atomic_read(&pt->used))
|
2018-06-14 20:43:15 +07:00
|
|
|
continue;
|
|
|
|
|
2019-07-12 16:43:22 +07:00
|
|
|
free_px(&ppgtt->base.vm, pt);
|
2019-07-12 14:58:18 +07:00
|
|
|
pd->entry[pde] = scratch;
|
2018-06-14 20:43:15 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
ppgtt->scan_for_unused_pt = false;
|
2018-06-12 19:04:46 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static const struct i915_vma_ops pd_vma_ops = {
|
|
|
|
.set_pages = pd_vma_set_pages,
|
|
|
|
.clear_pages = pd_vma_clear_pages,
|
|
|
|
.bind_vma = pd_vma_bind,
|
|
|
|
.unbind_vma = pd_vma_unbind,
|
|
|
|
};
|
|
|
|
|
2019-06-11 16:12:38 +07:00
|
|
|
static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
|
2018-06-12 19:04:46 +07:00
|
|
|
{
|
2019-06-21 14:08:08 +07:00
|
|
|
struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt;
|
2018-06-12 19:04:46 +07:00
|
|
|
struct i915_vma *vma;
|
|
|
|
|
|
|
|
GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
|
|
|
|
GEM_BUG_ON(size > ggtt->vm.total);
|
|
|
|
|
2019-02-28 17:20:34 +07:00
|
|
|
vma = i915_vma_alloc();
|
2018-06-12 19:04:46 +07:00
|
|
|
if (!vma)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
2019-10-04 20:40:00 +07:00
|
|
|
i915_active_init(&vma->active, NULL, NULL);
|
2018-06-12 19:04:46 +07:00
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
mutex_init(&vma->pages_mutex);
|
|
|
|
vma->vm = i915_vm_get(&ggtt->vm);
|
2018-06-12 19:04:46 +07:00
|
|
|
vma->ops = &pd_vma_ops;
|
|
|
|
vma->private = ppgtt;
|
|
|
|
|
|
|
|
vma->size = size;
|
|
|
|
vma->fence_size = size;
|
2019-09-11 16:02:43 +07:00
|
|
|
atomic_set(&vma->flags, I915_VMA_GGTT);
|
2018-06-12 19:04:46 +07:00
|
|
|
vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
|
|
|
|
|
|
|
|
INIT_LIST_HEAD(&vma->obj_link);
|
2019-06-06 18:23:20 +07:00
|
|
|
INIT_LIST_HEAD(&vma->closed_link);
|
2019-01-28 17:23:53 +07:00
|
|
|
|
2018-06-12 19:04:46 +07:00
|
|
|
return vma;
|
|
|
|
}
|
2012-02-09 23:15:46 +07:00
|
|
|
|
2019-06-11 16:12:38 +07:00
|
|
|
int gen6_ppgtt_pin(struct i915_ppgtt *base)
|
2018-06-12 19:04:46 +07:00
|
|
|
{
|
2019-06-11 16:12:38 +07:00
|
|
|
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
|
2019-09-13 13:42:00 +07:00
|
|
|
int err = 0;
|
2018-06-12 19:04:46 +07:00
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open));
|
2019-03-22 16:23:23 +07:00
|
|
|
|
2018-06-14 16:41:03 +07:00
|
|
|
/*
|
|
|
|
* Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
|
|
|
|
* which will be pinned into every active context.
|
|
|
|
* (When vma->pin_count becomes atomic, I expect we will naturally
|
|
|
|
* need a larger, unpacked, type and kill this redundancy.)
|
|
|
|
*/
|
2019-09-13 13:42:00 +07:00
|
|
|
if (atomic_add_unless(&ppgtt->pin_count, 1, 0))
|
2018-06-14 16:41:03 +07:00
|
|
|
return 0;
|
|
|
|
|
2019-09-13 13:42:00 +07:00
|
|
|
if (mutex_lock_interruptible(&ppgtt->pin_mutex))
|
|
|
|
return -EINTR;
|
|
|
|
|
2018-06-12 19:04:46 +07:00
|
|
|
/*
|
|
|
|
* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
|
|
|
|
* allocator works in address space sizes, so it's multiplied by page
|
|
|
|
* size. We allocate at the top of the GTT to avoid fragmentation.
|
|
|
|
*/
|
2019-09-13 13:42:00 +07:00
|
|
|
if (!atomic_read(&ppgtt->pin_count)) {
|
|
|
|
err = i915_vma_pin(ppgtt->vma,
|
|
|
|
0, GEN6_PD_ALIGN,
|
|
|
|
PIN_GLOBAL | PIN_HIGH);
|
|
|
|
}
|
|
|
|
if (!err)
|
|
|
|
atomic_inc(&ppgtt->pin_count);
|
|
|
|
mutex_unlock(&ppgtt->pin_mutex);
|
2018-12-22 10:06:23 +07:00
|
|
|
|
|
|
|
return err;
|
2014-02-20 13:05:49 +07:00
|
|
|
}
|
|
|
|
|
2019-06-11 16:12:38 +07:00
|
|
|
void gen6_ppgtt_unpin(struct i915_ppgtt *base)
|
2018-06-14 16:41:03 +07:00
|
|
|
{
|
2019-06-11 16:12:38 +07:00
|
|
|
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
|
2018-06-14 16:41:03 +07:00
|
|
|
|
2019-09-13 13:42:00 +07:00
|
|
|
GEM_BUG_ON(!atomic_read(&ppgtt->pin_count));
|
|
|
|
if (atomic_dec_and_test(&ppgtt->pin_count))
|
|
|
|
i915_vma_unpin(ppgtt->vma);
|
2018-06-14 16:41:03 +07:00
|
|
|
}
|
|
|
|
|
2019-06-11 16:12:38 +07:00
|
|
|
void gen6_ppgtt_unpin_all(struct i915_ppgtt *base)
|
2019-03-22 16:23:23 +07:00
|
|
|
{
|
2019-06-11 16:12:38 +07:00
|
|
|
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
|
2019-03-22 16:23:23 +07:00
|
|
|
|
2019-09-13 13:42:00 +07:00
|
|
|
if (!atomic_read(&ppgtt->pin_count))
|
2019-03-22 16:23:23 +07:00
|
|
|
return;
|
|
|
|
|
|
|
|
i915_vma_unpin(ppgtt->vma);
|
2019-09-13 13:42:00 +07:00
|
|
|
atomic_set(&ppgtt->pin_count, 0);
|
2019-03-22 16:23:23 +07:00
|
|
|
}
|
|
|
|
|
2019-06-11 16:12:38 +07:00
|
|
|
static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
|
2014-02-20 13:05:49 +07:00
|
|
|
{
|
2018-06-07 23:30:40 +07:00
|
|
|
struct i915_ggtt * const ggtt = &i915->ggtt;
|
2019-06-11 16:12:38 +07:00
|
|
|
struct gen6_ppgtt *ppgtt;
|
2018-06-07 23:30:40 +07:00
|
|
|
int err;
|
|
|
|
|
|
|
|
ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
|
|
|
|
if (!ppgtt)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
2019-09-13 13:42:00 +07:00
|
|
|
mutex_init(&ppgtt->pin_mutex);
|
|
|
|
|
2019-06-21 14:07:59 +07:00
|
|
|
ppgtt_init(&ppgtt->base, &i915->gt);
|
2019-07-12 16:43:24 +07:00
|
|
|
ppgtt->base.vm.top = 1;
|
2018-07-05 01:55:18 +07:00
|
|
|
|
2019-10-04 20:39:57 +07:00
|
|
|
ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
|
2018-06-14 20:43:14 +07:00
|
|
|
ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
|
2018-06-12 15:18:14 +07:00
|
|
|
ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
|
|
|
|
ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
|
|
|
|
ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
|
2017-02-28 22:28:11 +07:00
|
|
|
|
2018-06-12 15:18:15 +07:00
|
|
|
ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
|
|
|
|
|
2019-07-12 18:27:22 +07:00
|
|
|
ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd));
|
2019-06-14 23:43:42 +07:00
|
|
|
if (!ppgtt->base.pd) {
|
|
|
|
err = -ENOMEM;
|
2019-06-21 01:37:05 +07:00
|
|
|
goto err_free;
|
2019-06-14 23:43:42 +07:00
|
|
|
}
|
|
|
|
|
2018-06-12 19:04:46 +07:00
|
|
|
err = gen6_ppgtt_init_scratch(ppgtt);
|
2018-06-12 15:18:15 +07:00
|
|
|
if (err)
|
2019-06-14 23:43:42 +07:00
|
|
|
goto err_pd;
|
2018-06-12 15:18:15 +07:00
|
|
|
|
2018-06-12 19:04:46 +07:00
|
|
|
ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
|
|
|
|
if (IS_ERR(ppgtt->vma)) {
|
|
|
|
err = PTR_ERR(ppgtt->vma);
|
2018-06-12 15:18:15 +07:00
|
|
|
goto err_scratch;
|
2018-06-12 19:04:46 +07:00
|
|
|
}
|
2018-06-12 15:18:15 +07:00
|
|
|
|
2018-06-12 15:18:14 +07:00
|
|
|
return &ppgtt->base;
|
2013-01-25 04:49:56 +07:00
|
|
|
|
2018-06-12 15:18:15 +07:00
|
|
|
err_scratch:
|
2019-07-12 14:58:18 +07:00
|
|
|
free_scratch(&ppgtt->base.vm);
|
2019-06-14 23:43:42 +07:00
|
|
|
err_pd:
|
|
|
|
kfree(ppgtt->base.pd);
|
2018-06-07 23:30:40 +07:00
|
|
|
err_free:
|
|
|
|
kfree(ppgtt);
|
|
|
|
return ERR_PTR(err);
|
2014-08-07 01:19:54 +07:00
|
|
|
}
|
2015-06-25 22:35:13 +07:00
|
|
|
|
2019-06-21 14:07:51 +07:00
|
|
|
static void gtt_write_workarounds(struct intel_gt *gt)
|
2016-02-04 18:49:34 +07:00
|
|
|
{
|
2019-06-21 14:07:51 +07:00
|
|
|
struct drm_i915_private *i915 = gt->i915;
|
|
|
|
struct intel_uncore *uncore = gt->uncore;
|
|
|
|
|
2016-02-04 18:49:34 +07:00
|
|
|
/* This function is for gtt related workarounds. This function is
|
|
|
|
* called on driver load and after a GPU reset, so you can place
|
|
|
|
* workarounds here even if they get overwritten by GPU reset.
|
|
|
|
*/
|
2018-05-09 04:29:23 +07:00
|
|
|
/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
|
2019-06-21 14:07:51 +07:00
|
|
|
if (IS_BROADWELL(i915))
|
|
|
|
intel_uncore_write(uncore,
|
|
|
|
GEN8_L3_LRA_1_GPGPU,
|
|
|
|
GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
|
|
|
|
else if (IS_CHERRYVIEW(i915))
|
|
|
|
intel_uncore_write(uncore,
|
|
|
|
GEN8_L3_LRA_1_GPGPU,
|
|
|
|
GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
|
|
|
|
else if (IS_GEN9_LP(i915))
|
|
|
|
intel_uncore_write(uncore,
|
|
|
|
GEN8_L3_LRA_1_GPGPU,
|
|
|
|
GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
|
2019-08-23 15:20:48 +07:00
|
|
|
else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11)
|
2019-06-21 14:07:51 +07:00
|
|
|
intel_uncore_write(uncore,
|
|
|
|
GEN8_L3_LRA_1_GPGPU,
|
|
|
|
GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
|
2017-10-07 05:18:22 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* To support 64K PTEs we need to first enable the use of the
|
|
|
|
* Intermediate-Page-Size(IPS) bit of the PDE field via some magical
|
|
|
|
* mmio, otherwise the page-walker will simply ignore the IPS bit. This
|
|
|
|
* shouldn't be needed after GEN10.
|
|
|
|
*
|
|
|
|
* 64K pages were first introduced from BDW+, although technically they
|
|
|
|
* only *work* from gen9+. For pre-BDW we instead have the option for
|
|
|
|
* 32K pages, but we don't currently have any support for it in our
|
|
|
|
* driver.
|
|
|
|
*/
|
2019-06-21 14:07:51 +07:00
|
|
|
if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
|
|
|
|
INTEL_GEN(i915) <= 10)
|
2019-06-21 14:07:52 +07:00
|
|
|
intel_uncore_rmw(uncore,
|
|
|
|
GEN8_GAMW_ECO_DEV_RW_IA,
|
|
|
|
0,
|
|
|
|
GAMW_ECO_ENABLE_64K_IPS_FIELD);
|
2019-08-10 02:34:55 +07:00
|
|
|
|
|
|
|
if (IS_GEN_RANGE(i915, 8, 11)) {
|
|
|
|
bool can_use_gtt_cache = true;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* According to the BSpec if we use 2M/1G pages then we also
|
|
|
|
* need to disable the GTT cache. At least on BDW we can see
|
|
|
|
* visual corruption when using 2M pages, and not disabling the
|
|
|
|
* GTT cache.
|
|
|
|
*/
|
|
|
|
if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
|
|
|
|
can_use_gtt_cache = false;
|
|
|
|
|
|
|
|
/* WaGttCachingOffByDefault */
|
|
|
|
intel_uncore_write(uncore,
|
|
|
|
HSW_GTT_CACHE_EN,
|
|
|
|
can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
|
|
|
|
WARN_ON_ONCE(can_use_gtt_cache &&
|
|
|
|
intel_uncore_read(uncore,
|
|
|
|
HSW_GTT_CACHE_EN) == 0);
|
|
|
|
}
|
2016-02-04 18:49:34 +07:00
|
|
|
}
|
|
|
|
|
2019-06-21 14:07:51 +07:00
|
|
|
int i915_ppgtt_init_hw(struct intel_gt *gt)
|
2014-08-07 01:19:53 +07:00
|
|
|
{
|
2019-06-21 14:07:51 +07:00
|
|
|
struct drm_i915_private *i915 = gt->i915;
|
|
|
|
|
|
|
|
gtt_write_workarounds(gt);
|
2016-02-04 18:49:34 +07:00
|
|
|
|
2019-06-21 14:07:51 +07:00
|
|
|
if (IS_GEN(i915, 6))
|
|
|
|
gen6_ppgtt_enable(gt);
|
|
|
|
else if (IS_GEN(i915, 7))
|
|
|
|
gen7_ppgtt_enable(gt);
|
2014-08-07 01:19:53 +07:00
|
|
|
|
2015-06-18 19:11:20 +07:00
|
|
|
return 0;
|
|
|
|
}
|
2012-02-09 23:15:46 +07:00
|
|
|
|
2019-06-11 16:12:38 +07:00
|
|
|
static struct i915_ppgtt *
|
|
|
|
__ppgtt_create(struct drm_i915_private *i915)
|
2018-06-07 23:30:40 +07:00
|
|
|
{
|
|
|
|
if (INTEL_GEN(i915) < 8)
|
|
|
|
return gen6_ppgtt_create(i915);
|
|
|
|
else
|
|
|
|
return gen8_ppgtt_create(i915);
|
|
|
|
}
|
|
|
|
|
2019-06-11 16:12:38 +07:00
|
|
|
struct i915_ppgtt *
|
2019-03-21 21:07:08 +07:00
|
|
|
i915_ppgtt_create(struct drm_i915_private *i915)
|
2014-08-06 20:04:47 +07:00
|
|
|
{
|
2019-06-11 16:12:38 +07:00
|
|
|
struct i915_ppgtt *ppgtt;
|
2014-08-06 20:04:47 +07:00
|
|
|
|
2019-06-11 16:12:38 +07:00
|
|
|
ppgtt = __ppgtt_create(i915);
|
2018-06-07 23:30:40 +07:00
|
|
|
if (IS_ERR(ppgtt))
|
|
|
|
return ppgtt;
|
2014-08-06 20:04:47 +07:00
|
|
|
|
2018-06-05 22:37:58 +07:00
|
|
|
trace_i915_ppgtt_create(&ppgtt->vm);
|
2014-11-10 20:44:31 +07:00
|
|
|
|
2014-08-06 20:04:47 +07:00
|
|
|
return ppgtt;
|
|
|
|
}
|
|
|
|
|
2013-01-19 03:30:31 +07:00
|
|
|
/* Certain Gen5 chipsets require require idling the GPU before
|
|
|
|
* unmapping anything from the GTT when VT-d is enabled.
|
|
|
|
*/
|
2016-08-04 13:52:22 +07:00
|
|
|
static bool needs_idle_maps(struct drm_i915_private *dev_priv)
|
2013-01-19 03:30:31 +07:00
|
|
|
{
|
|
|
|
/* Query intel_iommu to see if we need the workaround. Presumably that
|
|
|
|
* was loaded first.
|
|
|
|
*/
|
drm/i915: replace IS_GEN<N> with IS_GEN(..., N)
Define IS_GEN() similarly to our IS_GEN_RANGE(). but use gen instead of
gen_mask to do the comparison. Now callers can pass then gen as a parameter,
so we don't require one macro for each gen.
The following spatch was used to convert the users of these macros:
@@
expression e;
@@
(
- IS_GEN2(e)
+ IS_GEN(e, 2)
|
- IS_GEN3(e)
+ IS_GEN(e, 3)
|
- IS_GEN4(e)
+ IS_GEN(e, 4)
|
- IS_GEN5(e)
+ IS_GEN(e, 5)
|
- IS_GEN6(e)
+ IS_GEN(e, 6)
|
- IS_GEN7(e)
+ IS_GEN(e, 7)
|
- IS_GEN8(e)
+ IS_GEN(e, 8)
|
- IS_GEN9(e)
+ IS_GEN(e, 9)
|
- IS_GEN10(e)
+ IS_GEN(e, 10)
|
- IS_GEN11(e)
+ IS_GEN(e, 11)
)
v2: use IS_GEN rather than GT_GEN and compare to info.gen rather than
using the bitmask
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181212181044.15886-2-lucas.demarchi@intel.com
2018-12-13 01:10:43 +07:00
|
|
|
return IS_GEN(dev_priv, 5) && IS_MOBILE(dev_priv) && intel_vtd_active();
|
2013-01-19 03:30:31 +07:00
|
|
|
}
|
|
|
|
|
2019-06-21 14:08:00 +07:00
|
|
|
static void ggtt_suspend_mappings(struct i915_ggtt *ggtt)
|
2013-10-16 23:21:30 +07:00
|
|
|
{
|
2019-06-21 14:08:00 +07:00
|
|
|
struct drm_i915_private *i915 = ggtt->vm.i915;
|
2013-10-16 23:21:30 +07:00
|
|
|
|
|
|
|
/* Don't bother messing with faults pre GEN6 as we have little
|
|
|
|
* documentation supporting that it's a good idea.
|
|
|
|
*/
|
2019-06-21 14:08:00 +07:00
|
|
|
if (INTEL_GEN(i915) < 6)
|
2013-10-16 23:21:30 +07:00
|
|
|
return;
|
|
|
|
|
2019-06-21 14:08:00 +07:00
|
|
|
intel_gt_check_and_clear_faults(ggtt->vm.gt);
|
2013-10-16 23:21:30 +07:00
|
|
|
|
2018-06-05 22:37:58 +07:00
|
|
|
ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
|
2014-09-25 16:13:12 +07:00
|
|
|
|
2019-06-21 14:07:58 +07:00
|
|
|
ggtt->invalidate(ggtt);
|
2013-10-16 23:21:30 +07:00
|
|
|
}
|
|
|
|
|
2019-06-21 14:08:00 +07:00
|
|
|
void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915)
|
|
|
|
{
|
|
|
|
ggtt_suspend_mappings(&i915->ggtt);
|
|
|
|
}
|
|
|
|
|
2016-10-28 19:58:36 +07:00
|
|
|
int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
|
|
|
|
struct sg_table *pages)
|
2010-11-06 16:10:47 +07:00
|
|
|
{
|
2017-01-06 22:22:39 +07:00
|
|
|
do {
|
2018-01-04 23:38:42 +07:00
|
|
|
if (dma_map_sg_attrs(&obj->base.dev->pdev->dev,
|
|
|
|
pages->sgl, pages->nents,
|
|
|
|
PCI_DMA_BIDIRECTIONAL,
|
|
|
|
DMA_ATTR_NO_WARN))
|
2017-01-06 22:22:39 +07:00
|
|
|
return 0;
|
|
|
|
|
2019-01-07 18:54:24 +07:00
|
|
|
/*
|
|
|
|
* If the DMA remap fails, one cause can be that we have
|
2017-01-06 22:22:39 +07:00
|
|
|
* too many objects pinned in a small remapping table,
|
|
|
|
* such as swiotlb. Incrementally purge all other objects and
|
|
|
|
* try again - if there are no more pages to remove from
|
|
|
|
* the DMA remapper, i915_gem_shrink will return 0.
|
|
|
|
*/
|
|
|
|
GEM_BUG_ON(obj->mm.pages == pages);
|
|
|
|
} while (i915_gem_shrink(to_i915(obj->base.dev),
|
2017-09-07 06:19:30 +07:00
|
|
|
obj->base.size >> PAGE_SHIFT, NULL,
|
2017-01-06 22:22:39 +07:00
|
|
|
I915_SHRINK_BOUND |
|
2019-01-07 18:54:24 +07:00
|
|
|
I915_SHRINK_UNBOUND));
|
2012-06-01 21:20:22 +07:00
|
|
|
|
2016-10-28 19:58:36 +07:00
|
|
|
return -ENOSPC;
|
2010-11-06 16:10:47 +07:00
|
|
|
}
|
|
|
|
|
2015-04-14 22:35:26 +07:00
|
|
|
static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
|
2013-11-03 11:07:18 +07:00
|
|
|
{
|
|
|
|
writeq(pte, addr);
|
|
|
|
}
|
|
|
|
|
2016-06-10 15:52:59 +07:00
|
|
|
static void gen8_ggtt_insert_page(struct i915_address_space *vm,
|
|
|
|
dma_addr_t addr,
|
2017-02-15 15:43:57 +07:00
|
|
|
u64 offset,
|
2016-06-10 15:52:59 +07:00
|
|
|
enum i915_cache_level level,
|
|
|
|
u32 unused)
|
|
|
|
{
|
2017-01-12 18:00:49 +07:00
|
|
|
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
|
2016-06-10 15:52:59 +07:00
|
|
|
gen8_pte_t __iomem *pte =
|
2018-09-18 00:14:14 +07:00
|
|
|
(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
|
2016-06-10 15:52:59 +07:00
|
|
|
|
2018-07-13 01:53:10 +07:00
|
|
|
gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
|
2016-06-10 15:52:59 +07:00
|
|
|
|
2019-06-21 14:07:58 +07:00
|
|
|
ggtt->invalidate(ggtt);
|
2016-06-10 15:52:59 +07:00
|
|
|
}
|
|
|
|
|
2013-11-03 11:07:18 +07:00
|
|
|
static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
|
2017-06-22 16:58:36 +07:00
|
|
|
struct i915_vma *vma,
|
2017-02-15 15:43:57 +07:00
|
|
|
enum i915_cache_level level,
|
2018-07-13 01:53:11 +07:00
|
|
|
u32 flags)
|
2013-11-03 11:07:18 +07:00
|
|
|
{
|
2016-04-28 15:56:38 +07:00
|
|
|
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
|
2016-05-20 17:54:06 +07:00
|
|
|
struct sgt_iter sgt_iter;
|
|
|
|
gen8_pte_t __iomem *gtt_entries;
|
2018-07-13 01:53:10 +07:00
|
|
|
const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
|
2016-05-20 17:54:06 +07:00
|
|
|
dma_addr_t addr;
|
2015-12-16 01:10:38 +07:00
|
|
|
|
2018-07-13 01:53:13 +07:00
|
|
|
/*
|
|
|
|
* Note that we ignore PTE_READ_ONLY here. The caller must be careful
|
|
|
|
* not to allow the user to override access to a read only page.
|
|
|
|
*/
|
2018-07-13 01:53:11 +07:00
|
|
|
|
2017-02-15 15:43:37 +07:00
|
|
|
gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
|
2018-09-18 00:14:14 +07:00
|
|
|
gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE;
|
2019-08-30 03:19:19 +07:00
|
|
|
for_each_sgt_daddr(addr, sgt_iter, vma->pages)
|
2017-02-15 15:43:37 +07:00
|
|
|
gen8_set_pte(gtt_entries++, pte_encode | addr);
|
2016-05-20 17:54:06 +07:00
|
|
|
|
2018-05-08 19:41:54 +07:00
|
|
|
/*
|
|
|
|
* We want to flush the TLBs only after we're certain all the PTE
|
|
|
|
* updates have finished.
|
2013-11-03 11:07:18 +07:00
|
|
|
*/
|
2019-06-21 14:07:58 +07:00
|
|
|
ggtt->invalidate(ggtt);
|
2013-11-03 11:07:18 +07:00
|
|
|
}
|
|
|
|
|
2016-06-10 15:52:59 +07:00
|
|
|
static void gen6_ggtt_insert_page(struct i915_address_space *vm,
|
|
|
|
dma_addr_t addr,
|
2017-02-15 15:43:57 +07:00
|
|
|
u64 offset,
|
2016-06-10 15:52:59 +07:00
|
|
|
enum i915_cache_level level,
|
|
|
|
u32 flags)
|
|
|
|
{
|
2017-01-12 18:00:49 +07:00
|
|
|
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
|
2016-06-10 15:52:59 +07:00
|
|
|
gen6_pte_t __iomem *pte =
|
2018-09-18 00:14:14 +07:00
|
|
|
(gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
|
2016-06-10 15:52:59 +07:00
|
|
|
|
2016-10-13 19:02:40 +07:00
|
|
|
iowrite32(vm->pte_encode(addr, level, flags), pte);
|
2016-06-10 15:52:59 +07:00
|
|
|
|
2019-06-21 14:07:58 +07:00
|
|
|
ggtt->invalidate(ggtt);
|
2016-06-10 15:52:59 +07:00
|
|
|
}
|
|
|
|
|
2012-11-05 00:21:27 +07:00
|
|
|
/*
|
|
|
|
* Binds an object into the global gtt with the specified cache level. The object
|
|
|
|
* will be accessible to the GPU via commands whose operands reference offsets
|
|
|
|
* within the global GTT as well as accessible by the GPU through the GMADR
|
|
|
|
* mapped BAR (dev_priv->mm.gtt->gtt).
|
|
|
|
*/
|
2013-07-17 06:50:05 +07:00
|
|
|
static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
|
2017-06-22 16:58:36 +07:00
|
|
|
struct i915_vma *vma,
|
2017-02-15 15:43:57 +07:00
|
|
|
enum i915_cache_level level,
|
|
|
|
u32 flags)
|
2012-11-05 00:21:27 +07:00
|
|
|
{
|
2016-04-28 15:56:38 +07:00
|
|
|
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
|
2017-02-15 15:43:36 +07:00
|
|
|
gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
|
2018-09-18 00:14:14 +07:00
|
|
|
unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE;
|
2017-02-15 15:43:36 +07:00
|
|
|
struct sgt_iter iter;
|
2016-05-20 17:54:06 +07:00
|
|
|
dma_addr_t addr;
|
2019-08-30 03:19:19 +07:00
|
|
|
for_each_sgt_daddr(addr, iter, vma->pages)
|
2017-02-15 15:43:36 +07:00
|
|
|
iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
|
2012-11-05 00:21:30 +07:00
|
|
|
|
2018-05-08 19:41:54 +07:00
|
|
|
/*
|
|
|
|
* We want to flush the TLBs only after we're certain all the PTE
|
|
|
|
* updates have finished.
|
2012-11-05 00:21:30 +07:00
|
|
|
*/
|
2019-06-21 14:07:58 +07:00
|
|
|
ggtt->invalidate(ggtt);
|
2012-11-05 00:21:27 +07:00
|
|
|
}
|
|
|
|
|
2016-05-14 13:26:35 +07:00
|
|
|
static void nop_clear_range(struct i915_address_space *vm,
|
2017-02-15 15:43:57 +07:00
|
|
|
u64 start, u64 length)
|
2016-05-14 13:26:35 +07:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2013-11-03 11:07:18 +07:00
|
|
|
static void gen8_ggtt_clear_range(struct i915_address_space *vm,
|
2017-02-15 15:43:57 +07:00
|
|
|
u64 start, u64 length)
|
2013-11-03 11:07:18 +07:00
|
|
|
{
|
2016-04-28 15:56:38 +07:00
|
|
|
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
|
2018-09-18 00:14:14 +07:00
|
|
|
unsigned first_entry = start / I915_GTT_PAGE_SIZE;
|
|
|
|
unsigned num_entries = length / I915_GTT_PAGE_SIZE;
|
2019-07-12 16:43:26 +07:00
|
|
|
const gen8_pte_t scratch_pte = vm->scratch[0].encode;
|
2017-02-15 15:43:37 +07:00
|
|
|
gen8_pte_t __iomem *gtt_base =
|
2016-03-30 20:57:10 +07:00
|
|
|
(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
|
|
|
|
const int max_entries = ggtt_total_entries(ggtt) - first_entry;
|
2013-11-03 11:07:18 +07:00
|
|
|
int i;
|
|
|
|
|
|
|
|
if (WARN(num_entries > max_entries,
|
|
|
|
"First entry = %d; Num entries = %d (max=%d)\n",
|
|
|
|
first_entry, num_entries, max_entries))
|
|
|
|
num_entries = max_entries;
|
|
|
|
|
|
|
|
for (i = 0; i < num_entries; i++)
|
|
|
|
gen8_set_pte(>t_base[i], scratch_pte);
|
|
|
|
}
|
|
|
|
|
2017-05-24 22:54:11 +07:00
|
|
|
static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = vm->i915;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make sure the internal GAM fifo has been cleared of all GTT
|
|
|
|
* writes before exiting stop_machine(). This guarantees that
|
|
|
|
* any aperture accesses waiting to start in another process
|
|
|
|
* cannot back up behind the GTT writes causing a hang.
|
|
|
|
* The register can be any arbitrary GAM register.
|
|
|
|
*/
|
|
|
|
POSTING_READ(GFX_FLSH_CNTL_GEN6);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct insert_page {
|
|
|
|
struct i915_address_space *vm;
|
|
|
|
dma_addr_t addr;
|
|
|
|
u64 offset;
|
|
|
|
enum i915_cache_level level;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
|
|
|
|
{
|
|
|
|
struct insert_page *arg = _arg;
|
|
|
|
|
|
|
|
gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
|
|
|
|
bxt_vtd_ggtt_wa(arg->vm);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
|
|
|
|
dma_addr_t addr,
|
|
|
|
u64 offset,
|
|
|
|
enum i915_cache_level level,
|
|
|
|
u32 unused)
|
|
|
|
{
|
|
|
|
struct insert_page arg = { vm, addr, offset, level };
|
|
|
|
|
|
|
|
stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct insert_entries {
|
|
|
|
struct i915_address_space *vm;
|
2017-06-22 16:58:36 +07:00
|
|
|
struct i915_vma *vma;
|
2017-05-24 22:54:11 +07:00
|
|
|
enum i915_cache_level level;
|
2018-07-13 01:53:11 +07:00
|
|
|
u32 flags;
|
2017-05-24 22:54:11 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
|
|
|
|
{
|
|
|
|
struct insert_entries *arg = _arg;
|
|
|
|
|
2018-07-13 01:53:11 +07:00
|
|
|
gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
|
2017-05-24 22:54:11 +07:00
|
|
|
bxt_vtd_ggtt_wa(arg->vm);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
|
2017-06-22 16:58:36 +07:00
|
|
|
struct i915_vma *vma,
|
2017-05-24 22:54:11 +07:00
|
|
|
enum i915_cache_level level,
|
2018-07-13 01:53:11 +07:00
|
|
|
u32 flags)
|
2017-05-24 22:54:11 +07:00
|
|
|
{
|
2018-07-13 01:53:11 +07:00
|
|
|
struct insert_entries arg = { vm, vma, level, flags };
|
2017-05-24 22:54:11 +07:00
|
|
|
|
|
|
|
stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct clear_range {
|
|
|
|
struct i915_address_space *vm;
|
|
|
|
u64 start;
|
|
|
|
u64 length;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
|
|
|
|
{
|
|
|
|
struct clear_range *arg = _arg;
|
|
|
|
|
|
|
|
gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
|
|
|
|
bxt_vtd_ggtt_wa(arg->vm);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
|
|
|
|
u64 start,
|
|
|
|
u64 length)
|
|
|
|
{
|
|
|
|
struct clear_range arg = { vm, start, length };
|
|
|
|
|
|
|
|
stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
|
|
|
|
}
|
|
|
|
|
2013-07-17 06:50:05 +07:00
|
|
|
static void gen6_ggtt_clear_range(struct i915_address_space *vm,
|
2017-02-15 15:43:57 +07:00
|
|
|
u64 start, u64 length)
|
2013-01-25 05:44:55 +07:00
|
|
|
{
|
2016-04-28 15:56:38 +07:00
|
|
|
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
|
2018-09-18 00:14:14 +07:00
|
|
|
unsigned first_entry = start / I915_GTT_PAGE_SIZE;
|
|
|
|
unsigned num_entries = length / I915_GTT_PAGE_SIZE;
|
2015-03-16 23:00:54 +07:00
|
|
|
gen6_pte_t scratch_pte, __iomem *gtt_base =
|
2016-03-30 20:57:10 +07:00
|
|
|
(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
|
|
|
|
const int max_entries = ggtt_total_entries(ggtt) - first_entry;
|
2013-01-25 05:44:55 +07:00
|
|
|
int i;
|
|
|
|
|
|
|
|
if (WARN(num_entries > max_entries,
|
|
|
|
"First entry = %d; Num entries = %d (max=%d)\n",
|
|
|
|
first_entry, num_entries, max_entries))
|
|
|
|
num_entries = max_entries;
|
|
|
|
|
2019-07-12 16:43:26 +07:00
|
|
|
scratch_pte = vm->scratch[0].encode;
|
2013-01-25 05:44:55 +07:00
|
|
|
for (i = 0; i < num_entries; i++)
|
|
|
|
iowrite32(scratch_pte, >t_base[i]);
|
|
|
|
}
|
|
|
|
|
2016-06-10 15:52:59 +07:00
|
|
|
static void i915_ggtt_insert_page(struct i915_address_space *vm,
|
|
|
|
dma_addr_t addr,
|
2017-02-15 15:43:57 +07:00
|
|
|
u64 offset,
|
2016-06-10 15:52:59 +07:00
|
|
|
enum i915_cache_level cache_level,
|
|
|
|
u32 unused)
|
|
|
|
{
|
|
|
|
unsigned int flags = (cache_level == I915_CACHE_NONE) ?
|
|
|
|
AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
|
|
|
|
|
|
|
|
intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
|
|
|
|
}
|
|
|
|
|
2015-04-14 22:35:25 +07:00
|
|
|
static void i915_ggtt_insert_entries(struct i915_address_space *vm,
|
2017-06-22 16:58:36 +07:00
|
|
|
struct i915_vma *vma,
|
2017-02-15 15:43:57 +07:00
|
|
|
enum i915_cache_level cache_level,
|
|
|
|
u32 unused)
|
2013-01-25 05:44:55 +07:00
|
|
|
{
|
|
|
|
unsigned int flags = (cache_level == I915_CACHE_NONE) ?
|
|
|
|
AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
|
|
|
|
|
2017-06-22 16:58:36 +07:00
|
|
|
intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
|
|
|
|
flags);
|
2013-01-25 05:44:55 +07:00
|
|
|
}
|
|
|
|
|
2013-07-17 06:50:05 +07:00
|
|
|
static void i915_ggtt_clear_range(struct i915_address_space *vm,
|
2017-02-15 15:43:57 +07:00
|
|
|
u64 start, u64 length)
|
2013-01-25 05:44:55 +07:00
|
|
|
{
|
2016-10-24 19:42:17 +07:00
|
|
|
intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
|
2013-01-25 05:44:55 +07:00
|
|
|
}
|
|
|
|
|
2015-04-14 22:35:27 +07:00
|
|
|
static int ggtt_bind_vma(struct i915_vma *vma,
|
|
|
|
enum i915_cache_level cache_level,
|
|
|
|
u32 flags)
|
2015-10-15 19:23:01 +07:00
|
|
|
{
|
2016-11-29 16:50:08 +07:00
|
|
|
struct drm_i915_private *i915 = vma->vm->i915;
|
2015-10-15 19:23:01 +07:00
|
|
|
struct drm_i915_gem_object *obj = vma->obj;
|
2019-01-14 21:21:18 +07:00
|
|
|
intel_wakeref_t wakeref;
|
2017-02-15 15:43:35 +07:00
|
|
|
u32 pte_flags;
|
2015-10-15 19:23:01 +07:00
|
|
|
|
2018-07-13 01:53:11 +07:00
|
|
|
/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
|
2017-02-15 15:43:35 +07:00
|
|
|
pte_flags = 0;
|
2018-07-13 01:53:13 +07:00
|
|
|
if (i915_gem_object_is_readonly(obj))
|
2015-10-15 19:23:01 +07:00
|
|
|
pte_flags |= PTE_READ_ONLY;
|
|
|
|
|
2019-06-14 06:21:55 +07:00
|
|
|
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
|
2019-01-14 21:21:23 +07:00
|
|
|
vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
|
2015-10-15 19:23:01 +07:00
|
|
|
|
2017-10-07 05:18:27 +07:00
|
|
|
vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
|
|
|
|
|
2015-10-15 19:23:01 +07:00
|
|
|
/*
|
|
|
|
* Without aliasing PPGTT there's no difference between
|
|
|
|
* GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
|
|
|
|
* upgrade to both bound if we bind either to avoid double-binding.
|
|
|
|
*/
|
2019-09-11 16:02:43 +07:00
|
|
|
atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
|
2015-10-15 19:23:01 +07:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-02-15 15:43:39 +07:00
|
|
|
static void ggtt_unbind_vma(struct i915_vma *vma)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *i915 = vma->vm->i915;
|
2019-01-14 21:21:18 +07:00
|
|
|
intel_wakeref_t wakeref;
|
2017-02-15 15:43:39 +07:00
|
|
|
|
2019-06-14 06:21:55 +07:00
|
|
|
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
|
2019-01-14 21:21:23 +07:00
|
|
|
vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
|
2017-02-15 15:43:39 +07:00
|
|
|
}
|
|
|
|
|
2015-10-15 19:23:01 +07:00
|
|
|
static int aliasing_gtt_bind_vma(struct i915_vma *vma,
|
|
|
|
enum i915_cache_level cache_level,
|
|
|
|
u32 flags)
|
2011-04-14 12:48:26 +07:00
|
|
|
{
|
2016-11-29 16:50:08 +07:00
|
|
|
struct drm_i915_private *i915 = vma->vm->i915;
|
2015-11-20 17:27:18 +07:00
|
|
|
u32 pte_flags;
|
2017-02-15 15:43:42 +07:00
|
|
|
int ret;
|
2015-04-14 22:35:27 +07:00
|
|
|
|
2014-06-17 12:29:42 +07:00
|
|
|
/* Currently applicable only to VLV */
|
2015-11-20 17:27:18 +07:00
|
|
|
pte_flags = 0;
|
2018-07-13 01:53:13 +07:00
|
|
|
if (i915_gem_object_is_readonly(vma->obj))
|
2015-04-14 22:35:15 +07:00
|
|
|
pte_flags |= PTE_READ_ONLY;
|
2014-06-17 12:29:42 +07:00
|
|
|
|
2017-02-15 15:43:42 +07:00
|
|
|
if (flags & I915_VMA_LOCAL_BIND) {
|
2019-07-30 21:32:08 +07:00
|
|
|
struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;
|
2017-02-15 15:43:42 +07:00
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
if (flags & I915_VMA_ALLOC) {
|
2019-07-30 21:32:08 +07:00
|
|
|
ret = alias->vm.allocate_va_range(&alias->vm,
|
|
|
|
vma->node.start,
|
|
|
|
vma->size);
|
2017-02-15 15:43:42 +07:00
|
|
|
if (ret)
|
2017-10-07 05:18:19 +07:00
|
|
|
return ret;
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
|
|
|
|
set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
|
2017-02-15 15:43:42 +07:00
|
|
|
}
|
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT,
|
|
|
|
__i915_vma_flags(vma)));
|
2019-07-30 21:32:08 +07:00
|
|
|
alias->vm.insert_entries(&alias->vm, vma,
|
|
|
|
cache_level, pte_flags);
|
2017-02-15 15:43:42 +07:00
|
|
|
}
|
|
|
|
|
2016-08-04 22:32:32 +07:00
|
|
|
if (flags & I915_VMA_GLOBAL_BIND) {
|
2019-01-14 21:21:18 +07:00
|
|
|
intel_wakeref_t wakeref;
|
|
|
|
|
2019-06-14 06:21:55 +07:00
|
|
|
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
|
2019-01-14 21:21:23 +07:00
|
|
|
vma->vm->insert_entries(vma->vm, vma,
|
|
|
|
cache_level, pte_flags);
|
|
|
|
}
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 05:10:56 +07:00
|
|
|
}
|
2011-04-14 12:48:26 +07:00
|
|
|
|
2015-04-14 22:35:27 +07:00
|
|
|
return 0;
|
2011-04-14 12:48:26 +07:00
|
|
|
}
|
|
|
|
|
2017-02-15 15:43:39 +07:00
|
|
|
static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
|
2012-02-16 05:50:21 +07:00
|
|
|
{
|
2016-11-29 16:50:08 +07:00
|
|
|
struct drm_i915_private *i915 = vma->vm->i915;
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 05:10:56 +07:00
|
|
|
|
2019-09-11 16:02:43 +07:00
|
|
|
if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
|
2019-01-14 21:21:23 +07:00
|
|
|
struct i915_address_space *vm = vma->vm;
|
2019-01-14 21:21:18 +07:00
|
|
|
intel_wakeref_t wakeref;
|
|
|
|
|
2019-06-14 06:21:55 +07:00
|
|
|
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
|
2019-01-14 21:21:23 +07:00
|
|
|
vm->clear_range(vm, vma->node.start, vma->size);
|
2016-10-24 19:42:15 +07:00
|
|
|
}
|
2015-04-24 19:09:03 +07:00
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
|
2019-07-30 21:32:08 +07:00
|
|
|
struct i915_address_space *vm =
|
|
|
|
&i915_vm_to_ggtt(vma->vm)->alias->vm;
|
2017-02-15 15:43:39 +07:00
|
|
|
|
|
|
|
vm->clear_range(vm, vma->node.start, vma->size);
|
|
|
|
}
|
2012-02-16 05:50:21 +07:00
|
|
|
}
|
|
|
|
|
2016-10-28 19:58:36 +07:00
|
|
|
void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
|
|
|
|
struct sg_table *pages)
|
2010-11-06 16:10:47 +07:00
|
|
|
{
|
2016-08-22 17:32:44 +07:00
|
|
|
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
|
|
|
|
struct device *kdev = &dev_priv->drm.pdev->dev;
|
2016-08-05 16:14:12 +07:00
|
|
|
struct i915_ggtt *ggtt = &dev_priv->ggtt;
|
2011-10-18 05:51:55 +07:00
|
|
|
|
2016-08-05 16:14:12 +07:00
|
|
|
if (unlikely(ggtt->do_idle_maps)) {
|
2019-10-04 20:40:04 +07:00
|
|
|
/* XXX This does not prevent more requests being submitted! */
|
2019-10-04 20:40:06 +07:00
|
|
|
if (intel_gt_retire_requests_timeout(ggtt->vm.gt,
|
|
|
|
-MAX_SCHEDULE_TIMEOUT)) {
|
2016-08-05 16:14:12 +07:00
|
|
|
DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
|
|
|
|
/* Wait a bit, in hopes it avoids the hang */
|
|
|
|
udelay(10);
|
|
|
|
}
|
|
|
|
}
|
2011-10-18 05:51:55 +07:00
|
|
|
|
2016-10-28 19:58:36 +07:00
|
|
|
dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL);
|
2010-11-06 16:10:47 +07:00
|
|
|
}
|
2012-03-26 14:45:40 +07:00
|
|
|
|
2017-10-07 05:18:19 +07:00
|
|
|
static int ggtt_set_pages(struct i915_vma *vma)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
GEM_BUG_ON(vma->pages);
|
|
|
|
|
|
|
|
ret = i915_get_ggtt_vma_pages(vma);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2017-10-07 05:18:20 +07:00
|
|
|
vma->page_sizes = vma->obj->mm.page_sizes;
|
|
|
|
|
2017-10-07 05:18:19 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-09-09 19:40:51 +07:00
|
|
|
static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
|
|
|
|
unsigned long color,
|
|
|
|
u64 *start,
|
|
|
|
u64 *end)
|
2012-07-26 17:49:32 +07:00
|
|
|
{
|
2019-09-09 19:40:50 +07:00
|
|
|
if (i915_node_color_differs(node, color))
|
2017-01-10 21:47:34 +07:00
|
|
|
*start += I915_GTT_PAGE_SIZE;
|
2012-07-26 17:49:32 +07:00
|
|
|
|
2017-02-06 15:45:47 +07:00
|
|
|
/* Also leave a space between the unallocated reserved node after the
|
|
|
|
* GTT and any objects within the GTT, i.e. we use the color adjustment
|
|
|
|
* to insert a guard page to prevent prefetches crossing over the
|
|
|
|
* GTT boundary.
|
|
|
|
*/
|
2016-12-16 14:46:40 +07:00
|
|
|
node = list_next_entry(node, node_list);
|
2017-02-06 15:45:47 +07:00
|
|
|
if (node->color != color)
|
2017-01-10 21:47:34 +07:00
|
|
|
*end -= I915_GTT_PAGE_SIZE;
|
2012-07-26 17:49:32 +07:00
|
|
|
}
|
2013-11-05 10:56:49 +07:00
|
|
|
|
2019-07-30 21:32:08 +07:00
|
|
|
static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
|
2017-02-14 00:15:50 +07:00
|
|
|
{
|
2019-06-11 16:12:38 +07:00
|
|
|
struct i915_ppgtt *ppgtt;
|
2017-02-14 00:15:50 +07:00
|
|
|
int err;
|
|
|
|
|
2019-07-30 21:32:08 +07:00
|
|
|
ppgtt = i915_ppgtt_create(ggtt->vm.i915);
|
2017-02-15 15:43:38 +07:00
|
|
|
if (IS_ERR(ppgtt))
|
|
|
|
return PTR_ERR(ppgtt);
|
2017-02-14 00:15:50 +07:00
|
|
|
|
2018-06-15 01:42:18 +07:00
|
|
|
if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
|
2017-02-15 15:43:55 +07:00
|
|
|
err = -ENODEV;
|
|
|
|
goto err_ppgtt;
|
|
|
|
}
|
|
|
|
|
2018-06-14 20:43:14 +07:00
|
|
|
/*
|
|
|
|
* Note we only pre-allocate as far as the end of the global
|
|
|
|
* GTT. On 48b / 4-level page-tables, the difference is very,
|
|
|
|
* very significant! We have to preallocate as GVT/vgpu does
|
|
|
|
* not like the page directory disappearing.
|
|
|
|
*/
|
|
|
|
err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
|
|
|
|
if (err)
|
|
|
|
goto err_ppgtt;
|
2017-02-14 00:15:50 +07:00
|
|
|
|
2019-07-30 21:32:08 +07:00
|
|
|
ggtt->alias = ppgtt;
|
2019-10-04 20:39:57 +07:00
|
|
|
ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
|
2017-02-15 15:43:39 +07:00
|
|
|
|
2018-06-07 22:40:46 +07:00
|
|
|
GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
|
|
|
|
ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
|
2017-02-14 00:15:50 +07:00
|
|
|
|
2018-06-07 22:40:46 +07:00
|
|
|
GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
|
|
|
|
ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
|
2017-02-15 15:43:39 +07:00
|
|
|
|
2019-09-02 11:02:43 +07:00
|
|
|
ppgtt->vm.total = ggtt->vm.total;
|
|
|
|
|
2017-02-14 00:15:50 +07:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
err_ppgtt:
|
2019-06-11 16:12:37 +07:00
|
|
|
i915_vm_put(&ppgtt->vm);
|
2017-02-14 00:15:50 +07:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2019-07-30 21:32:08 +07:00
|
|
|
static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
|
2017-02-14 00:15:50 +07:00
|
|
|
{
|
2019-06-11 16:12:38 +07:00
|
|
|
struct i915_ppgtt *ppgtt;
|
2017-02-14 00:15:50 +07:00
|
|
|
|
2019-07-30 21:32:08 +07:00
|
|
|
ppgtt = fetch_and_zero(&ggtt->alias);
|
2017-02-14 00:15:50 +07:00
|
|
|
if (!ppgtt)
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
return;
|
2017-02-14 00:15:50 +07:00
|
|
|
|
2019-06-11 16:12:37 +07:00
|
|
|
i915_vm_put(&ppgtt->vm);
|
2017-02-14 00:15:50 +07:00
|
|
|
|
2018-06-07 22:40:46 +07:00
|
|
|
ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
|
|
|
|
ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
|
2017-02-14 00:15:50 +07:00
|
|
|
}
|
|
|
|
|
2019-06-11 19:23:50 +07:00
|
|
|
static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
|
|
|
|
{
|
|
|
|
u64 size;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!USES_GUC(ggtt->vm.i915))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
|
|
|
|
size = ggtt->vm.total - GUC_GGTT_TOP;
|
|
|
|
|
|
|
|
ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size,
|
|
|
|
GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
|
|
|
|
PIN_NOEVICT);
|
|
|
|
if (ret)
|
|
|
|
DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n");
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
|
|
|
|
{
|
|
|
|
if (drm_mm_node_allocated(&ggtt->uc_fw))
|
|
|
|
drm_mm_remove_node(&ggtt->uc_fw);
|
|
|
|
}
|
|
|
|
|
2019-06-21 14:08:05 +07:00
|
|
|
static void cleanup_init_ggtt(struct i915_ggtt *ggtt)
|
|
|
|
{
|
|
|
|
ggtt_release_guc_top(ggtt);
|
|
|
|
drm_mm_remove_node(&ggtt->error_capture);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int init_ggtt(struct i915_ggtt *ggtt)
|
2012-03-26 14:45:40 +07:00
|
|
|
{
|
2013-01-26 07:41:04 +07:00
|
|
|
/* Let GEM Manage all of the aperture.
|
|
|
|
*
|
|
|
|
* However, leave one page at the end still bound to the scratch page.
|
|
|
|
* There are a number of places where the hardware apparently prefetches
|
|
|
|
* past the end of the object, and we've seen multiple hangs with the
|
|
|
|
* GPU head pointer stuck in a batchbuffer bound at the last page of the
|
|
|
|
* aperture. One page should be enough to keep any prefetching inside
|
|
|
|
* of the aperture.
|
|
|
|
*/
|
2012-11-15 18:32:19 +07:00
|
|
|
unsigned long hole_start, hole_end;
|
2016-08-04 13:52:23 +07:00
|
|
|
struct drm_mm_node *entry;
|
2014-08-07 01:19:54 +07:00
|
|
|
int ret;
|
2012-03-26 14:45:40 +07:00
|
|
|
|
2018-07-27 21:11:45 +07:00
|
|
|
/*
|
|
|
|
* GuC requires all resources that we're sharing with it to be placed in
|
|
|
|
* non-WOPCM memory. If GuC is not present or not in use we still need a
|
|
|
|
* small bias as ring wraparound at offset 0 sometimes hangs. No idea
|
|
|
|
* why.
|
|
|
|
*/
|
|
|
|
ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
|
2019-06-21 14:08:05 +07:00
|
|
|
intel_wopcm_guc_size(&ggtt->vm.i915->wopcm));
|
2018-07-27 21:11:45 +07:00
|
|
|
|
2019-06-21 14:07:39 +07:00
|
|
|
ret = intel_vgt_balloon(ggtt);
|
2016-06-16 19:06:59 +07:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
2015-02-10 18:05:48 +07:00
|
|
|
|
2016-10-12 16:05:20 +07:00
|
|
|
/* Reserve a mappable slot for our lockless error capture */
|
2018-06-05 22:37:58 +07:00
|
|
|
ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture,
|
2017-02-03 04:04:38 +07:00
|
|
|
PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
|
|
|
|
0, ggtt->mappable_end,
|
|
|
|
DRM_MM_INSERT_LOW);
|
2016-10-12 16:05:20 +07:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2019-06-11 19:23:50 +07:00
|
|
|
/*
|
|
|
|
* The upper portion of the GuC address space has a sizeable hole
|
|
|
|
* (several MB) that is inaccessible by GuC. Reserve this range within
|
|
|
|
* GGTT as it can comfortably hold GuC/HuC firmware images.
|
|
|
|
*/
|
|
|
|
ret = ggtt_reserve_guc_top(ggtt);
|
|
|
|
if (ret)
|
2019-06-21 14:08:05 +07:00
|
|
|
goto err;
|
2019-04-20 06:00:12 +07:00
|
|
|
|
2012-11-15 18:32:19 +07:00
|
|
|
/* Clear any non-preallocated blocks */
|
2018-06-05 22:37:58 +07:00
|
|
|
drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
|
2012-11-15 18:32:19 +07:00
|
|
|
DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
|
|
|
|
hole_start, hole_end);
|
2018-06-05 22:37:58 +07:00
|
|
|
ggtt->vm.clear_range(&ggtt->vm, hole_start,
|
|
|
|
hole_end - hole_start);
|
2012-11-15 18:32:19 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
/* And finally clear the reserved guard page */
|
2018-06-05 22:37:58 +07:00
|
|
|
ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
|
2014-08-06 20:04:50 +07:00
|
|
|
|
2019-06-21 14:08:05 +07:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
err:
|
|
|
|
cleanup_init_ggtt(ggtt);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int i915_init_ggtt(struct drm_i915_private *i915)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = init_ggtt(&i915->ggtt);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
|
2019-07-30 21:32:08 +07:00
|
|
|
ret = init_aliasing_ppgtt(&i915->ggtt);
|
2016-10-12 16:05:20 +07:00
|
|
|
if (ret)
|
2019-06-21 14:08:05 +07:00
|
|
|
cleanup_init_ggtt(&i915->ggtt);
|
2014-08-07 01:19:54 +07:00
|
|
|
}
|
|
|
|
|
2014-08-06 20:04:50 +07:00
|
|
|
return 0;
|
2012-11-05 00:21:27 +07:00
|
|
|
}
|
|
|
|
|
2019-06-21 14:08:04 +07:00
|
|
|
static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
|
2014-08-06 20:04:56 +07:00
|
|
|
{
|
2017-02-10 23:35:22 +07:00
|
|
|
struct i915_vma *vma, *vn;
|
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
atomic_set(&ggtt->vm.open, 0);
|
2017-02-10 23:35:22 +07:00
|
|
|
|
2019-07-29 20:24:12 +07:00
|
|
|
rcu_barrier(); /* flush the RCU'ed__i915_vm_release */
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
flush_workqueue(ggtt->vm.i915->wq);
|
2019-07-29 20:24:12 +07:00
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
mutex_lock(&ggtt->vm.mutex);
|
2018-06-09 16:01:51 +07:00
|
|
|
|
2019-01-28 17:23:52 +07:00
|
|
|
list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
WARN_ON(__i915_vma_unbind(vma));
|
2017-02-15 15:43:38 +07:00
|
|
|
|
2016-10-12 16:05:20 +07:00
|
|
|
if (drm_mm_node_allocated(&ggtt->error_capture))
|
|
|
|
drm_mm_remove_node(&ggtt->error_capture);
|
|
|
|
|
2019-06-11 19:23:50 +07:00
|
|
|
ggtt_release_guc_top(ggtt);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
intel_vgt_deballoon(ggtt);
|
2014-08-06 20:04:56 +07:00
|
|
|
|
2018-06-05 22:37:58 +07:00
|
|
|
ggtt->vm.cleanup(&ggtt->vm);
|
2017-08-23 00:38:28 +07:00
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
mutex_unlock(&ggtt->vm.mutex);
|
|
|
|
i915_address_space_fini(&ggtt->vm);
|
2019-06-21 14:08:04 +07:00
|
|
|
|
|
|
|
arch_phys_wc_del(ggtt->mtrr);
|
|
|
|
io_mapping_fini(&ggtt->iomap);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2019-07-12 18:24:28 +07:00
|
|
|
* i915_ggtt_driver_release - Clean up GGTT hardware initialization
|
2019-06-21 20:16:40 +07:00
|
|
|
* @i915: i915 device
|
2019-06-21 14:08:04 +07:00
|
|
|
*/
|
2019-07-12 18:24:28 +07:00
|
|
|
void i915_ggtt_driver_release(struct drm_i915_private *i915)
|
2019-06-21 14:08:04 +07:00
|
|
|
{
|
|
|
|
struct pagevec *pvec;
|
|
|
|
|
2019-07-30 21:32:08 +07:00
|
|
|
fini_aliasing_ppgtt(&i915->ggtt);
|
2019-06-21 14:08:04 +07:00
|
|
|
|
|
|
|
ggtt_cleanup_hw(&i915->ggtt);
|
|
|
|
|
|
|
|
pvec = &i915->mm.wc_stash.pvec;
|
2017-08-23 00:38:28 +07:00
|
|
|
if (pvec->nr) {
|
|
|
|
set_pages_array_wb(pvec->pages, pvec->nr);
|
|
|
|
__pagevec_release(pvec);
|
|
|
|
}
|
|
|
|
|
2019-06-21 14:08:04 +07:00
|
|
|
i915_gem_cleanup_stolen(i915);
|
2014-08-06 20:04:56 +07:00
|
|
|
}
|
2014-08-06 20:04:57 +07:00
|
|
|
|
2015-04-14 22:35:26 +07:00
|
|
|
static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
|
2012-11-05 00:21:27 +07:00
|
|
|
{
|
|
|
|
snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
|
|
|
|
snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
|
|
|
|
return snb_gmch_ctl << 20;
|
|
|
|
}
|
|
|
|
|
2015-04-14 22:35:26 +07:00
|
|
|
static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
|
2013-11-04 07:53:55 +07:00
|
|
|
{
|
|
|
|
bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
|
|
|
|
bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
|
|
|
|
if (bdw_gmch_ctl)
|
|
|
|
bdw_gmch_ctl = 1 << bdw_gmch_ctl;
|
2014-05-28 06:53:08 +07:00
|
|
|
|
|
|
|
#ifdef CONFIG_X86_32
|
2018-09-13 22:04:05 +07:00
|
|
|
/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
|
2014-05-28 06:53:08 +07:00
|
|
|
if (bdw_gmch_ctl > 4)
|
|
|
|
bdw_gmch_ctl = 4;
|
|
|
|
#endif
|
|
|
|
|
2013-11-04 07:53:55 +07:00
|
|
|
return bdw_gmch_ctl << 20;
|
|
|
|
}
|
|
|
|
|
2015-04-14 22:35:26 +07:00
|
|
|
static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
|
2014-05-09 02:19:40 +07:00
|
|
|
{
|
|
|
|
gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
|
|
|
|
gmch_ctrl &= SNB_GMCH_GGMS_MASK;
|
|
|
|
|
|
|
|
if (gmch_ctrl)
|
|
|
|
return 1 << (20 + gmch_ctrl);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-08-04 13:52:24 +07:00
|
|
|
static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
|
2013-11-05 10:32:22 +07:00
|
|
|
{
|
2018-06-05 22:37:58 +07:00
|
|
|
struct drm_i915_private *dev_priv = ggtt->vm.i915;
|
2016-11-29 16:50:08 +07:00
|
|
|
struct pci_dev *pdev = dev_priv->drm.pdev;
|
2016-08-04 13:52:24 +07:00
|
|
|
phys_addr_t phys_addr;
|
2016-08-22 14:44:30 +07:00
|
|
|
int ret;
|
2013-11-05 10:32:22 +07:00
|
|
|
|
|
|
|
/* For Modern GENs the PTEs and register space are split in the BAR */
|
2016-08-04 13:52:24 +07:00
|
|
|
phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
|
2013-11-05 10:32:22 +07:00
|
|
|
|
2015-03-27 18:07:33 +07:00
|
|
|
/*
|
2017-08-30 06:09:07 +07:00
|
|
|
* On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
|
|
|
|
* will be dropped. For WC mappings in general we have 64 byte burst
|
|
|
|
* writes when the WC buffer is flushed, so we can't use it, but have to
|
2015-03-27 18:07:33 +07:00
|
|
|
* resort to an uncached mapping. The WC issue is easily caught by the
|
|
|
|
* readback check when writing GTT PTE entries.
|
|
|
|
*/
|
2017-08-30 06:09:07 +07:00
|
|
|
if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10)
|
2016-08-04 13:52:24 +07:00
|
|
|
ggtt->gsm = ioremap_nocache(phys_addr, size);
|
2015-03-27 18:07:33 +07:00
|
|
|
else
|
2016-08-04 13:52:24 +07:00
|
|
|
ggtt->gsm = ioremap_wc(phys_addr, size);
|
2016-03-30 20:57:10 +07:00
|
|
|
if (!ggtt->gsm) {
|
2016-08-04 13:52:24 +07:00
|
|
|
DRM_ERROR("Failed to map the ggtt page table\n");
|
2013-11-05 10:32:22 +07:00
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2018-06-05 22:37:58 +07:00
|
|
|
ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
|
2016-08-22 14:44:30 +07:00
|
|
|
if (ret) {
|
2013-11-05 10:32:22 +07:00
|
|
|
DRM_ERROR("Scratch setup failed\n");
|
|
|
|
/* iounmap will also get called at remove, but meh */
|
2016-03-30 20:57:10 +07:00
|
|
|
iounmap(ggtt->gsm);
|
2016-08-22 14:44:30 +07:00
|
|
|
return ret;
|
2013-11-05 10:32:22 +07:00
|
|
|
}
|
|
|
|
|
2019-07-12 16:43:26 +07:00
|
|
|
ggtt->vm.scratch[0].encode =
|
|
|
|
ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]),
|
2018-10-30 01:27:20 +07:00
|
|
|
I915_CACHE_NONE, 0);
|
|
|
|
|
2015-06-30 22:16:39 +07:00
|
|
|
return 0;
|
2013-11-05 10:32:22 +07:00
|
|
|
}
|
|
|
|
|
2019-08-17 16:38:54 +07:00
|
|
|
static void tgl_setup_private_ppat(struct drm_i915_private *dev_priv)
|
|
|
|
{
|
|
|
|
/* TGL doesn't support LLC or AGE settings */
|
|
|
|
I915_WRITE(GEN12_PAT_INDEX(0), GEN8_PPAT_WB);
|
|
|
|
I915_WRITE(GEN12_PAT_INDEX(1), GEN8_PPAT_WC);
|
|
|
|
I915_WRITE(GEN12_PAT_INDEX(2), GEN8_PPAT_WT);
|
|
|
|
I915_WRITE(GEN12_PAT_INDEX(3), GEN8_PPAT_UC);
|
|
|
|
I915_WRITE(GEN12_PAT_INDEX(4), GEN8_PPAT_WB);
|
|
|
|
I915_WRITE(GEN12_PAT_INDEX(5), GEN8_PPAT_WB);
|
|
|
|
I915_WRITE(GEN12_PAT_INDEX(6), GEN8_PPAT_WB);
|
|
|
|
I915_WRITE(GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
|
|
|
|
}
|
|
|
|
|
2019-07-02 18:31:48 +07:00
|
|
|
static void cnl_setup_private_ppat(struct drm_i915_private *dev_priv)
|
2017-08-16 06:25:39 +07:00
|
|
|
{
|
2019-07-02 18:31:48 +07:00
|
|
|
I915_WRITE(GEN10_PAT_INDEX(0), GEN8_PPAT_WB | GEN8_PPAT_LLC);
|
|
|
|
I915_WRITE(GEN10_PAT_INDEX(1), GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
|
|
|
|
I915_WRITE(GEN10_PAT_INDEX(2), GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
|
|
|
|
I915_WRITE(GEN10_PAT_INDEX(3), GEN8_PPAT_UC);
|
|
|
|
I915_WRITE(GEN10_PAT_INDEX(4), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
|
|
|
|
I915_WRITE(GEN10_PAT_INDEX(5), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
|
|
|
|
I915_WRITE(GEN10_PAT_INDEX(6), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
|
|
|
|
I915_WRITE(GEN10_PAT_INDEX(7), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
|
2017-08-16 06:25:39 +07:00
|
|
|
}
|
|
|
|
|
2013-11-05 10:56:49 +07:00
|
|
|
/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
|
|
|
|
* bits. When using advanced contexts each context stores its own PAT, but
|
|
|
|
* writing this data shouldn't be harmful even in those cases. */
|
2019-07-02 18:31:48 +07:00
|
|
|
static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
|
2013-11-05 10:56:49 +07:00
|
|
|
{
|
2019-07-02 18:31:48 +07:00
|
|
|
u64 pat;
|
2013-11-05 10:56:49 +07:00
|
|
|
|
2019-07-02 18:31:49 +07:00
|
|
|
pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
|
|
|
|
GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
|
|
|
|
GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
|
|
|
|
GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
|
|
|
|
GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
|
|
|
|
GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
|
|
|
|
GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
|
|
|
|
GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
|
2014-11-06 07:56:36 +07:00
|
|
|
|
2019-07-02 18:31:48 +07:00
|
|
|
I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
|
|
|
|
I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
|
2013-11-05 10:56:49 +07:00
|
|
|
}
|
|
|
|
|
2019-07-02 18:31:48 +07:00
|
|
|
static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
|
2014-04-09 17:28:01 +07:00
|
|
|
{
|
2019-07-02 18:31:48 +07:00
|
|
|
u64 pat;
|
2014-04-09 17:28:01 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Map WB on BDW to snooped on CHV.
|
|
|
|
*
|
|
|
|
* Only the snoop bit has meaning for CHV, the rest is
|
|
|
|
* ignored.
|
|
|
|
*
|
2014-11-15 02:02:44 +07:00
|
|
|
* The hardware will never snoop for certain types of accesses:
|
|
|
|
* - CPU GTT (GMADR->GGTT->no snoop->memory)
|
|
|
|
* - PPGTT page tables
|
|
|
|
* - some other special cycles
|
|
|
|
*
|
|
|
|
* As with BDW, we also need to consider the following for GT accesses:
|
|
|
|
* "For GGTT, there is NO pat_sel[2:0] from the entry,
|
|
|
|
* so RTL will always use the value corresponding to
|
|
|
|
* pat_sel = 000".
|
|
|
|
* Which means we must set the snoop bit in PAT entry 0
|
|
|
|
* in order to keep the global status page working.
|
2014-04-09 17:28:01 +07:00
|
|
|
*/
|
|
|
|
|
2019-07-02 18:31:48 +07:00
|
|
|
pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
|
|
|
|
GEN8_PPAT(1, 0) |
|
|
|
|
GEN8_PPAT(2, 0) |
|
|
|
|
GEN8_PPAT(3, 0) |
|
|
|
|
GEN8_PPAT(4, CHV_PPAT_SNOOP) |
|
|
|
|
GEN8_PPAT(5, CHV_PPAT_SNOOP) |
|
|
|
|
GEN8_PPAT(6, CHV_PPAT_SNOOP) |
|
|
|
|
GEN8_PPAT(7, CHV_PPAT_SNOOP);
|
|
|
|
|
|
|
|
I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
|
|
|
|
I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
|
2014-04-09 17:28:01 +07:00
|
|
|
}
|
|
|
|
|
2016-08-04 13:52:24 +07:00
|
|
|
static void gen6_gmch_remove(struct i915_address_space *vm)
|
|
|
|
{
|
|
|
|
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
|
|
|
|
|
|
|
|
iounmap(ggtt->gsm);
|
2017-02-15 15:43:40 +07:00
|
|
|
cleanup_scratch_page(vm);
|
2016-08-04 13:52:24 +07:00
|
|
|
}
|
|
|
|
|
2017-09-12 14:42:24 +07:00
|
|
|
static void setup_private_pat(struct drm_i915_private *dev_priv)
|
|
|
|
{
|
2019-07-02 18:31:49 +07:00
|
|
|
GEM_BUG_ON(INTEL_GEN(dev_priv) < 8);
|
|
|
|
|
2019-08-17 16:38:54 +07:00
|
|
|
if (INTEL_GEN(dev_priv) >= 12)
|
|
|
|
tgl_setup_private_ppat(dev_priv);
|
|
|
|
else if (INTEL_GEN(dev_priv) >= 10)
|
2019-07-02 18:31:48 +07:00
|
|
|
cnl_setup_private_ppat(dev_priv);
|
2017-09-12 14:42:24 +07:00
|
|
|
else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
|
2019-07-02 18:31:48 +07:00
|
|
|
chv_setup_private_ppat(dev_priv);
|
2017-09-12 14:42:24 +07:00
|
|
|
else
|
2019-07-02 18:31:48 +07:00
|
|
|
bdw_setup_private_ppat(dev_priv);
|
2017-09-12 14:42:24 +07:00
|
|
|
}
|
|
|
|
|
2016-03-18 15:42:58 +07:00
|
|
|
static int gen8_gmch_probe(struct i915_ggtt *ggtt)
|
2013-11-05 10:32:22 +07:00
|
|
|
{
|
2018-06-05 22:37:58 +07:00
|
|
|
struct drm_i915_private *dev_priv = ggtt->vm.i915;
|
2016-08-04 13:52:22 +07:00
|
|
|
struct pci_dev *pdev = dev_priv->drm.pdev;
|
2016-08-04 13:52:24 +07:00
|
|
|
unsigned int size;
|
2013-11-05 10:32:22 +07:00
|
|
|
u16 snb_gmch_ctl;
|
2017-05-10 16:21:50 +07:00
|
|
|
int err;
|
2013-11-05 10:32:22 +07:00
|
|
|
|
|
|
|
/* TODO: We're not aware of mappable constraints on gen8 yet */
|
2017-12-11 22:18:20 +07:00
|
|
|
ggtt->gmadr =
|
|
|
|
(struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
|
|
|
|
pci_resource_len(pdev, 2));
|
|
|
|
ggtt->mappable_end = resource_size(&ggtt->gmadr);
|
2013-11-05 10:32:22 +07:00
|
|
|
|
2017-05-10 16:21:50 +07:00
|
|
|
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
|
|
|
|
if (!err)
|
|
|
|
err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
|
|
|
|
if (err)
|
|
|
|
DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
|
2013-11-05 10:32:22 +07:00
|
|
|
|
2016-08-04 13:52:22 +07:00
|
|
|
pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
|
2018-05-04 04:29:56 +07:00
|
|
|
if (IS_CHERRYVIEW(dev_priv))
|
2016-08-04 13:52:24 +07:00
|
|
|
size = chv_get_total_gtt_size(snb_gmch_ctl);
|
2018-05-04 04:29:56 +07:00
|
|
|
else
|
2016-08-04 13:52:24 +07:00
|
|
|
size = gen8_get_total_gtt_size(snb_gmch_ctl);
|
2013-11-05 10:32:22 +07:00
|
|
|
|
2018-09-18 00:14:14 +07:00
|
|
|
ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
|
2018-06-05 22:37:58 +07:00
|
|
|
ggtt->vm.cleanup = gen6_gmch_remove;
|
|
|
|
ggtt->vm.insert_page = gen8_ggtt_insert_page;
|
|
|
|
ggtt->vm.clear_range = nop_clear_range;
|
2018-09-27 03:12:22 +07:00
|
|
|
if (intel_scanout_needs_vtd_wa(dev_priv))
|
2018-06-05 22:37:58 +07:00
|
|
|
ggtt->vm.clear_range = gen8_ggtt_clear_range;
|
2016-05-14 13:26:35 +07:00
|
|
|
|
2018-06-05 22:37:58 +07:00
|
|
|
ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
|
2016-05-14 13:26:35 +07:00
|
|
|
|
2017-05-24 22:54:11 +07:00
|
|
|
/* Serialize GTT updates with aperture access on BXT if VT-d is on. */
|
2019-01-15 04:17:27 +07:00
|
|
|
if (intel_ggtt_update_needs_vtd_wa(dev_priv) ||
|
|
|
|
IS_CHERRYVIEW(dev_priv) /* fails with concurrent use/update */) {
|
2018-06-05 22:37:58 +07:00
|
|
|
ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
|
|
|
|
ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
|
|
|
|
if (ggtt->vm.clear_range != nop_clear_range)
|
|
|
|
ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
|
2017-05-24 22:54:11 +07:00
|
|
|
}
|
|
|
|
|
2017-01-12 18:00:49 +07:00
|
|
|
ggtt->invalidate = gen6_ggtt_invalidate;
|
|
|
|
|
2018-06-07 22:40:46 +07:00
|
|
|
ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
|
|
|
|
ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
|
|
|
|
ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
|
|
|
|
ggtt->vm.vma_ops.clear_pages = clear_pages;
|
|
|
|
|
2018-10-30 01:27:20 +07:00
|
|
|
ggtt->vm.pte_encode = gen8_pte_encode;
|
|
|
|
|
2017-09-12 14:42:24 +07:00
|
|
|
setup_private_pat(dev_priv);
|
|
|
|
|
2016-08-04 13:52:24 +07:00
|
|
|
return ggtt_probe_common(ggtt, size);
|
2013-11-05 10:32:22 +07:00
|
|
|
}
|
|
|
|
|
2016-03-18 15:42:58 +07:00
|
|
|
static int gen6_gmch_probe(struct i915_ggtt *ggtt)
|
2012-11-05 00:21:27 +07:00
|
|
|
{
|
2018-06-05 22:37:58 +07:00
|
|
|
struct drm_i915_private *dev_priv = ggtt->vm.i915;
|
2016-08-04 13:52:22 +07:00
|
|
|
struct pci_dev *pdev = dev_priv->drm.pdev;
|
2016-08-04 13:52:24 +07:00
|
|
|
unsigned int size;
|
2012-11-05 00:21:27 +07:00
|
|
|
u16 snb_gmch_ctl;
|
2017-05-10 16:21:50 +07:00
|
|
|
int err;
|
2012-11-05 00:21:27 +07:00
|
|
|
|
2017-12-11 22:18:20 +07:00
|
|
|
ggtt->gmadr =
|
|
|
|
(struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
|
|
|
|
pci_resource_len(pdev, 2));
|
|
|
|
ggtt->mappable_end = resource_size(&ggtt->gmadr);
|
2013-02-09 02:32:47 +07:00
|
|
|
|
2013-01-25 04:49:57 +07:00
|
|
|
/* 64/512MB is the current min/max we actually know of, but this is just
|
|
|
|
* a coarse sanity check.
|
2012-11-05 00:21:27 +07:00
|
|
|
*/
|
2016-08-04 13:52:24 +07:00
|
|
|
if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
|
2017-12-11 22:18:22 +07:00
|
|
|
DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
|
2013-01-25 04:49:57 +07:00
|
|
|
return -ENXIO;
|
2012-11-05 00:21:27 +07:00
|
|
|
}
|
|
|
|
|
2017-05-10 16:21:50 +07:00
|
|
|
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
|
|
|
|
if (!err)
|
|
|
|
err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
|
|
|
|
if (err)
|
|
|
|
DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
|
2016-08-04 13:52:22 +07:00
|
|
|
pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
|
2012-11-05 00:21:27 +07:00
|
|
|
|
2016-08-04 13:52:24 +07:00
|
|
|
size = gen6_get_total_gtt_size(snb_gmch_ctl);
|
2018-09-18 00:14:14 +07:00
|
|
|
ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
|
2012-11-05 00:21:27 +07:00
|
|
|
|
2019-04-20 03:12:07 +07:00
|
|
|
ggtt->vm.clear_range = nop_clear_range;
|
|
|
|
if (!HAS_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
|
|
|
|
ggtt->vm.clear_range = gen6_ggtt_clear_range;
|
2018-06-05 22:37:58 +07:00
|
|
|
ggtt->vm.insert_page = gen6_ggtt_insert_page;
|
|
|
|
ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
|
|
|
|
ggtt->vm.cleanup = gen6_gmch_remove;
|
2016-08-04 13:52:24 +07:00
|
|
|
|
2017-01-12 18:00:49 +07:00
|
|
|
ggtt->invalidate = gen6_ggtt_invalidate;
|
|
|
|
|
2016-08-04 13:52:24 +07:00
|
|
|
if (HAS_EDRAM(dev_priv))
|
2018-06-05 22:37:58 +07:00
|
|
|
ggtt->vm.pte_encode = iris_pte_encode;
|
2016-08-04 13:52:24 +07:00
|
|
|
else if (IS_HASWELL(dev_priv))
|
2018-06-05 22:37:58 +07:00
|
|
|
ggtt->vm.pte_encode = hsw_pte_encode;
|
2016-08-04 13:52:24 +07:00
|
|
|
else if (IS_VALLEYVIEW(dev_priv))
|
2018-06-05 22:37:58 +07:00
|
|
|
ggtt->vm.pte_encode = byt_pte_encode;
|
2016-08-04 13:52:24 +07:00
|
|
|
else if (INTEL_GEN(dev_priv) >= 7)
|
2018-06-05 22:37:58 +07:00
|
|
|
ggtt->vm.pte_encode = ivb_pte_encode;
|
2016-08-04 13:52:24 +07:00
|
|
|
else
|
2018-06-05 22:37:58 +07:00
|
|
|
ggtt->vm.pte_encode = snb_pte_encode;
|
2013-01-25 05:44:55 +07:00
|
|
|
|
2018-06-07 22:40:46 +07:00
|
|
|
ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
|
|
|
|
ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
|
|
|
|
ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
|
|
|
|
ggtt->vm.vma_ops.clear_pages = clear_pages;
|
|
|
|
|
2016-08-04 13:52:24 +07:00
|
|
|
return ggtt_probe_common(ggtt, size);
|
2012-11-05 00:21:27 +07:00
|
|
|
}
|
|
|
|
|
2016-08-04 13:52:24 +07:00
|
|
|
static void i915_gmch_remove(struct i915_address_space *vm)
|
2012-11-05 00:21:27 +07:00
|
|
|
{
|
2016-08-04 13:52:24 +07:00
|
|
|
intel_gmch_remove();
|
2012-03-26 14:45:40 +07:00
|
|
|
}
|
2013-01-25 04:49:57 +07:00
|
|
|
|
2016-03-18 15:42:58 +07:00
|
|
|
static int i915_gmch_probe(struct i915_ggtt *ggtt)
|
2013-01-25 04:49:57 +07:00
|
|
|
{
|
2018-06-05 22:37:58 +07:00
|
|
|
struct drm_i915_private *dev_priv = ggtt->vm.i915;
|
2017-12-11 22:18:20 +07:00
|
|
|
phys_addr_t gmadr_base;
|
2013-01-25 04:49:57 +07:00
|
|
|
int ret;
|
|
|
|
|
2016-07-05 16:40:23 +07:00
|
|
|
ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
|
2013-01-25 04:49:57 +07:00
|
|
|
if (!ret) {
|
|
|
|
DRM_ERROR("failed to set up gmch\n");
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
2018-06-05 22:37:58 +07:00
|
|
|
intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
|
2013-01-25 04:49:57 +07:00
|
|
|
|
2017-12-11 22:18:20 +07:00
|
|
|
ggtt->gmadr =
|
|
|
|
(struct resource) DEFINE_RES_MEM(gmadr_base,
|
|
|
|
ggtt->mappable_end);
|
|
|
|
|
2016-08-04 13:52:22 +07:00
|
|
|
ggtt->do_idle_maps = needs_idle_maps(dev_priv);
|
2018-06-05 22:37:58 +07:00
|
|
|
ggtt->vm.insert_page = i915_ggtt_insert_page;
|
|
|
|
ggtt->vm.insert_entries = i915_ggtt_insert_entries;
|
|
|
|
ggtt->vm.clear_range = i915_ggtt_clear_range;
|
|
|
|
ggtt->vm.cleanup = i915_gmch_remove;
|
2013-01-25 04:49:57 +07:00
|
|
|
|
2017-01-12 18:00:49 +07:00
|
|
|
ggtt->invalidate = gmch_ggtt_invalidate;
|
|
|
|
|
2018-06-07 22:40:46 +07:00
|
|
|
ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
|
|
|
|
ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
|
|
|
|
ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
|
|
|
|
ggtt->vm.vma_ops.clear_pages = clear_pages;
|
|
|
|
|
2016-03-18 15:42:58 +07:00
|
|
|
if (unlikely(ggtt->do_idle_maps))
|
2019-08-15 16:36:04 +07:00
|
|
|
dev_notice(dev_priv->drm.dev,
|
|
|
|
"Applying Ironlake quirks for intel_iommu\n");
|
2013-12-30 19:16:15 +07:00
|
|
|
|
2013-01-25 04:49:57 +07:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-06-21 14:07:59 +07:00
|
|
|
static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
|
2013-01-25 04:49:57 +07:00
|
|
|
{
|
2019-06-21 14:07:59 +07:00
|
|
|
struct drm_i915_private *i915 = gt->i915;
|
2013-01-25 04:49:57 +07:00
|
|
|
int ret;
|
|
|
|
|
2019-06-21 14:07:59 +07:00
|
|
|
ggtt->vm.gt = gt;
|
2019-06-21 14:07:56 +07:00
|
|
|
ggtt->vm.i915 = i915;
|
|
|
|
ggtt->vm.dma = &i915->drm.pdev->dev;
|
2015-06-25 22:35:13 +07:00
|
|
|
|
2019-06-21 14:07:56 +07:00
|
|
|
if (INTEL_GEN(i915) <= 5)
|
2016-08-04 13:52:24 +07:00
|
|
|
ret = i915_gmch_probe(ggtt);
|
2019-06-21 14:07:56 +07:00
|
|
|
else if (INTEL_GEN(i915) < 8)
|
2016-08-04 13:52:24 +07:00
|
|
|
ret = gen6_gmch_probe(ggtt);
|
|
|
|
else
|
|
|
|
ret = gen8_gmch_probe(ggtt);
|
2013-01-25 05:45:00 +07:00
|
|
|
if (ret)
|
2013-01-25 04:49:57 +07:00
|
|
|
return ret;
|
|
|
|
|
2018-06-05 22:37:58 +07:00
|
|
|
if ((ggtt->vm.total - 1) >> 32) {
|
2016-03-18 15:42:59 +07:00
|
|
|
DRM_ERROR("We never expected a Global GTT with more than 32bits"
|
2016-08-04 13:52:23 +07:00
|
|
|
" of address space! Found %lldM!\n",
|
2018-06-05 22:37:58 +07:00
|
|
|
ggtt->vm.total >> 20);
|
|
|
|
ggtt->vm.total = 1ULL << 32;
|
|
|
|
ggtt->mappable_end =
|
|
|
|
min_t(u64, ggtt->mappable_end, ggtt->vm.total);
|
2016-03-18 15:42:59 +07:00
|
|
|
}
|
|
|
|
|
2018-06-05 22:37:58 +07:00
|
|
|
if (ggtt->mappable_end > ggtt->vm.total) {
|
2016-08-04 13:52:23 +07:00
|
|
|
DRM_ERROR("mappable aperture extends past end of GGTT,"
|
2017-12-11 22:18:22 +07:00
|
|
|
" aperture=%pa, total=%llx\n",
|
2018-06-05 22:37:58 +07:00
|
|
|
&ggtt->mappable_end, ggtt->vm.total);
|
|
|
|
ggtt->mappable_end = ggtt->vm.total;
|
2016-08-04 13:52:23 +07:00
|
|
|
}
|
|
|
|
|
2013-01-25 04:49:57 +07:00
|
|
|
/* GMADR is the PCI mmio aperture into the global GTT. */
|
2018-06-05 22:37:58 +07:00
|
|
|
DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20);
|
2017-12-11 22:18:20 +07:00
|
|
|
DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
|
2017-12-12 18:35:32 +07:00
|
|
|
DRM_DEBUG_DRIVER("DSM size = %lluM\n",
|
2017-12-11 22:18:18 +07:00
|
|
|
(u64)resource_size(&intel_graphics_stolen_res) >> 20);
|
2019-06-21 14:07:56 +07:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* i915_ggtt_probe_hw - Probe GGTT hardware location
|
2019-06-21 20:16:40 +07:00
|
|
|
* @i915: i915 device
|
2019-06-21 14:07:56 +07:00
|
|
|
*/
|
|
|
|
int i915_ggtt_probe_hw(struct drm_i915_private *i915)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
2019-06-21 14:07:59 +07:00
|
|
|
ret = ggtt_probe_hw(&i915->ggtt, &i915->gt);
|
2019-06-21 14:07:56 +07:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2017-05-25 19:16:12 +07:00
|
|
|
if (intel_vtd_active())
|
2019-08-15 16:36:04 +07:00
|
|
|
dev_info(i915->drm.dev, "VT-d active for gfx access\n");
|
2013-01-25 04:49:57 +07:00
|
|
|
|
|
|
|
return 0;
|
2016-08-04 13:52:21 +07:00
|
|
|
}
|
|
|
|
|
2019-06-21 14:07:57 +07:00
|
|
|
static int ggtt_init_hw(struct i915_ggtt *ggtt)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *i915 = ggtt->vm.i915;
|
2018-07-05 01:55:18 +07:00
|
|
|
|
2019-01-15 04:59:56 +07:00
|
|
|
i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
|
2018-07-13 01:53:11 +07:00
|
|
|
|
2018-08-31 21:36:43 +07:00
|
|
|
ggtt->vm.is_ggtt = true;
|
|
|
|
|
2018-07-13 01:53:11 +07:00
|
|
|
/* Only VLV supports read-only GGTT mappings */
|
2019-06-21 14:07:57 +07:00
|
|
|
ggtt->vm.has_read_only = IS_VALLEYVIEW(i915);
|
2018-07-13 01:53:11 +07:00
|
|
|
|
2019-06-21 14:07:57 +07:00
|
|
|
if (!HAS_LLC(i915) && !HAS_PPGTT(i915))
|
2019-09-09 19:40:51 +07:00
|
|
|
ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust;
|
2016-08-04 13:52:23 +07:00
|
|
|
|
2019-06-21 14:07:57 +07:00
|
|
|
if (!io_mapping_init_wc(&ggtt->iomap,
|
|
|
|
ggtt->gmadr.start,
|
|
|
|
ggtt->mappable_end)) {
|
2019-06-21 14:08:04 +07:00
|
|
|
ggtt->vm.cleanup(&ggtt->vm);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
return -EIO;
|
2016-08-04 13:52:23 +07:00
|
|
|
}
|
|
|
|
|
2017-12-11 22:18:20 +07:00
|
|
|
ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end);
|
2016-08-04 13:52:23 +07:00
|
|
|
|
2019-06-13 14:32:54 +07:00
|
|
|
i915_ggtt_init_fences(ggtt);
|
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
return 0;
|
2019-06-21 14:07:57 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* i915_ggtt_init_hw - Initialize GGTT hardware
|
|
|
|
* @dev_priv: i915 device
|
|
|
|
*/
|
|
|
|
int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
stash_init(&dev_priv->mm.wc_stash);
|
|
|
|
|
|
|
|
/* Note that we use page colouring to enforce a guard page at the
|
|
|
|
* end of the address space. This is required as the CS may prefetch
|
|
|
|
* beyond the end of the batch buffer, across the page boundary,
|
|
|
|
* and beyond the end of the GTT if we do not provide a guard.
|
|
|
|
*/
|
|
|
|
ret = ggtt_init_hw(&dev_priv->ggtt);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2016-08-04 13:52:21 +07:00
|
|
|
/*
|
|
|
|
* Initialise stolen early so that we may reserve preallocated
|
|
|
|
* objects for the BIOS to KMS transition.
|
|
|
|
*/
|
2016-11-16 15:55:35 +07:00
|
|
|
ret = i915_gem_init_stolen(dev_priv);
|
2016-08-04 13:52:21 +07:00
|
|
|
if (ret)
|
|
|
|
goto out_gtt_cleanup;
|
|
|
|
|
|
|
|
return 0;
|
2016-01-19 20:26:32 +07:00
|
|
|
|
|
|
|
out_gtt_cleanup:
|
2019-06-21 14:08:04 +07:00
|
|
|
dev_priv->ggtt.vm.cleanup(&dev_priv->ggtt.vm);
|
2016-01-19 20:26:32 +07:00
|
|
|
return ret;
|
2013-01-25 04:49:57 +07:00
|
|
|
}
|
drm/i915: Create bind/unbind abstraction for VMAs
To sum up what goes on here, we abstract the vma binding, similarly to
the previous object binding. This helps for distinguishing legacy
binding, versus modern binding. To keep the code churn as minimal as
possible, I am leaving in insert_entries(). It serves as the per
platform pte writing basically. bind_vma and insert_entries do share a
lot of similarities, and I did have designs to combine the two, but as
mentioned already... too much churn in an already massive patchset.
What follows are the 3 commits which existed discretely in the original
submissions. Upon rebasing on Broadwell support, it became clear that
separation was not good, and only made for more error prone code. Below
are the 3 commit messages with all their history.
drm/i915: Add bind/unbind object functions to VMA
drm/i915: Use the new vm [un]bind functions
drm/i915: reduce vm->insert_entries() usage
drm/i915: Add bind/unbind object functions to VMA
As we plumb the code with more VM information, it has become more
obvious that the easiest way to deal with bind and unbind is to simply
put the function pointers in the vm, and let those choose the correct
way to handle the page table updates. This change allows many places in
the code to simply be vm->bind, and not have to worry about
distinguishing PPGTT vs GGTT.
Notice that this patch has no impact on functionality. I've decided to
save the actual change until the next patch because I think it's easier
to review that way. I'm happy to squash the two, or let Daniel do it on
merge.
v2:
Make ggtt handle the quirky aliasing ppgtt
Add flags to bind object to support above
Don't ever call bind/unbind directly for PPGTT until we have real, full
PPGTT (use NULLs to assert this)
Make sure we rebind the ggtt if there already is a ggtt binding. This
happens on set cache levels.
Use VMA for bind/unbind (Daniel, Ben)
v3: Reorganize ggtt_vma_bind to be more concise and easier to read
(Ville). Change logic in unbind to only unbind ggtt when there is a
global mapping, and to remove a redundant check if the aliasing ppgtt
exists.
v4: Make the bind function a bit smarter about the cache levels to avoid
unnecessary multiple remaps. "I accept it is a wart, I think unifying
the pin_vma / bind_vma could be unified later" (Chris)
Removed the git notes, and put version info here. (Daniel)
v5: Update the comment to not suck (Chris)
v6:
Move bind/unbind to the VMA. It makes more sense in the VMA structure
(always has, but I was previously lazy). With this change, it will allow
us to keep a distinct insert_entries.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: Use the new vm [un]bind functions
Building on the last patch which created the new function pointers in
the VM for bind/unbind, here we actually put those new function pointers
to use.
Split out as a separate patch to aid in review. I'm fine with squashing
into the previous patch if people request it.
v2: Updated to address the smart ggtt which can do aliasing as needed
Make sure we bind to global gtt when mappable and fenceable. I thought
we could get away without this initialy, but we cannot.
v3: Make the global GTT binding explicitly use the ggtt VM for
bind_vma(). While at it, use the new ggtt_vma helper (Chris)
At this point the original mailing list thread diverges. ie.
v4^:
use target_obj instead of obj for gen6 relocate_entry
vma->bind_vma() can be called safely during pin. So simply do that
instead of the complicated conditionals.
Don't restore PPGTT bound objects on resume path
Bug fix in resume path for globally bound Bos
Properly handle secure dispatch
Rebased on vma bind/unbind conversion
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
drm/i915: reduce vm->insert_entries() usage
FKA: drm/i915: eliminate vm->insert_entries()
With bind/unbind function pointers in place, we no longer need
insert_entries. We could, and want, to remove clear_range, however it's
not totally easy at this point. Since it's used in a couple of place
still that don't only deal in objects: setup, ppgtt init, and restore
gtt mappings.
v2: Don't actually remove insert_entries, just limit its usage. It will
be useful when we introduce gen8. It will always be called from the vma
bind/unbind.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-12-07 05:10:56 +07:00
|
|
|
|
2016-08-04 13:52:22 +07:00
|
|
|
int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
|
2016-05-07 01:35:55 +07:00
|
|
|
{
|
2016-08-04 13:52:22 +07:00
|
|
|
if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
|
2016-05-07 01:35:55 +07:00
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-07-13 17:00:14 +07:00
|
|
|
void i915_ggtt_enable_guc(struct i915_ggtt *ggtt)
|
2017-01-12 18:00:49 +07:00
|
|
|
{
|
2019-06-21 14:07:58 +07:00
|
|
|
GEM_BUG_ON(ggtt->invalidate != gen6_ggtt_invalidate);
|
2017-12-14 05:13:49 +07:00
|
|
|
|
2019-06-21 14:07:58 +07:00
|
|
|
ggtt->invalidate = guc_ggtt_invalidate;
|
|
|
|
|
|
|
|
ggtt->invalidate(ggtt);
|
2017-01-12 18:00:49 +07:00
|
|
|
}
|
|
|
|
|
2019-07-13 17:00:14 +07:00
|
|
|
void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
|
2017-01-12 18:00:49 +07:00
|
|
|
{
|
2018-07-20 16:51:44 +07:00
|
|
|
/* XXX Temporary pardon for error unload */
|
2019-06-21 14:07:58 +07:00
|
|
|
if (ggtt->invalidate == gen6_ggtt_invalidate)
|
2018-07-20 16:51:44 +07:00
|
|
|
return;
|
|
|
|
|
2017-06-01 16:04:46 +07:00
|
|
|
/* We should only be called after i915_ggtt_enable_guc() */
|
2019-06-21 14:07:58 +07:00
|
|
|
GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate);
|
2017-06-01 16:04:46 +07:00
|
|
|
|
2019-06-21 14:07:58 +07:00
|
|
|
ggtt->invalidate = gen6_ggtt_invalidate;
|
2017-12-14 05:13:49 +07:00
|
|
|
|
2019-06-21 14:07:58 +07:00
|
|
|
ggtt->invalidate(ggtt);
|
2017-01-12 18:00:49 +07:00
|
|
|
}
|
|
|
|
|
2019-06-21 14:08:00 +07:00
|
|
|
static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
|
2015-04-14 22:35:23 +07:00
|
|
|
{
|
2018-06-05 15:28:56 +07:00
|
|
|
struct i915_vma *vma, *vn;
|
2019-08-20 03:07:05 +07:00
|
|
|
bool flush = false;
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
int open;
|
2015-04-14 22:35:23 +07:00
|
|
|
|
2019-06-21 14:08:00 +07:00
|
|
|
intel_gt_check_and_clear_faults(ggtt->vm.gt);
|
2015-04-14 22:35:23 +07:00
|
|
|
|
2019-01-28 17:23:53 +07:00
|
|
|
mutex_lock(&ggtt->vm.mutex);
|
|
|
|
|
2015-04-14 22:35:23 +07:00
|
|
|
/* First fill our portion of the GTT with scratch pages */
|
2018-06-05 22:37:58 +07:00
|
|
|
ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
|
|
|
|
/* Skip rewriting PTE on VMA unbind. */
|
|
|
|
open = atomic_xchg(&ggtt->vm.open, 0);
|
2016-09-10 03:19:57 +07:00
|
|
|
|
|
|
|
/* clflush objects bound into the GGTT and rebind them. */
|
2019-01-28 17:23:52 +07:00
|
|
|
list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
|
2018-06-05 15:28:56 +07:00
|
|
|
struct drm_i915_gem_object *obj = vma->obj;
|
2016-09-10 03:19:57 +07:00
|
|
|
|
2019-09-11 16:02:43 +07:00
|
|
|
if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
|
2018-06-05 15:28:56 +07:00
|
|
|
continue;
|
2016-09-10 03:19:57 +07:00
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
if (!__i915_vma_unbind(vma))
|
|
|
|
continue;
|
2015-07-06 21:15:01 +07:00
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma));
|
2018-06-07 22:40:45 +07:00
|
|
|
WARN_ON(i915_vma_bind(vma,
|
|
|
|
obj ? obj->cache_level : 0,
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
PIN_GLOBAL, NULL));
|
2019-08-20 03:07:05 +07:00
|
|
|
if (obj) { /* only used during resume => exclusive access */
|
|
|
|
flush |= fetch_and_zero(&obj->write_domain);
|
|
|
|
obj->read_domains |= I915_GEM_DOMAIN_GTT;
|
2019-05-28 16:29:51 +07:00
|
|
|
}
|
2015-07-06 21:15:01 +07:00
|
|
|
}
|
2015-04-14 22:35:23 +07:00
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
atomic_set(&ggtt->vm.open, open);
|
2019-06-21 14:07:58 +07:00
|
|
|
ggtt->invalidate(ggtt);
|
2016-09-10 03:19:57 +07:00
|
|
|
|
2019-01-28 17:23:53 +07:00
|
|
|
mutex_unlock(&ggtt->vm.mutex);
|
2019-08-20 03:07:05 +07:00
|
|
|
|
|
|
|
if (flush)
|
|
|
|
wbinvd_on_all_cpus();
|
2019-06-21 14:08:00 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915)
|
|
|
|
{
|
|
|
|
ggtt_restore_mappings(&i915->ggtt);
|
2019-01-28 17:23:53 +07:00
|
|
|
|
2019-07-02 18:31:48 +07:00
|
|
|
if (INTEL_GEN(i915) >= 8)
|
|
|
|
setup_private_pat(i915);
|
2015-04-14 22:35:23 +07:00
|
|
|
}
|
|
|
|
|
2015-09-21 16:45:33 +07:00
|
|
|
static struct scatterlist *
|
2018-10-16 22:04:13 +07:00
|
|
|
rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset,
|
2015-09-21 16:45:33 +07:00
|
|
|
unsigned int width, unsigned int height,
|
2016-01-21 02:05:23 +07:00
|
|
|
unsigned int stride,
|
2015-09-21 16:45:33 +07:00
|
|
|
struct sg_table *st, struct scatterlist *sg)
|
2015-03-23 18:10:36 +07:00
|
|
|
{
|
|
|
|
unsigned int column, row;
|
|
|
|
unsigned int src_idx;
|
|
|
|
|
|
|
|
for (column = 0; column < width; column++) {
|
2018-10-16 22:04:13 +07:00
|
|
|
src_idx = stride * (height - 1) + column + offset;
|
2015-03-23 18:10:36 +07:00
|
|
|
for (row = 0; row < height; row++) {
|
|
|
|
st->nents++;
|
|
|
|
/* We don't need the pages, but need to initialize
|
|
|
|
* the entries so the sg list can be happily traversed.
|
|
|
|
* The only thing we need are DMA addresses.
|
|
|
|
*/
|
2018-09-13 22:04:05 +07:00
|
|
|
sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
|
2018-10-16 22:04:13 +07:00
|
|
|
sg_dma_address(sg) =
|
|
|
|
i915_gem_object_get_dma_address(obj, src_idx);
|
2018-09-13 22:04:05 +07:00
|
|
|
sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
|
2015-03-23 18:10:36 +07:00
|
|
|
sg = sg_next(sg);
|
2016-01-21 02:05:23 +07:00
|
|
|
src_idx -= stride;
|
2015-03-23 18:10:36 +07:00
|
|
|
}
|
|
|
|
}
|
2015-09-21 16:45:33 +07:00
|
|
|
|
|
|
|
return sg;
|
2015-03-23 18:10:36 +07:00
|
|
|
}
|
|
|
|
|
2017-02-15 15:43:35 +07:00
|
|
|
static noinline struct sg_table *
|
|
|
|
intel_rotate_pages(struct intel_rotation_info *rot_info,
|
|
|
|
struct drm_i915_gem_object *obj)
|
2015-03-23 18:10:36 +07:00
|
|
|
{
|
drm/i915: Rewrite fb rotation GTT handling
Redo the fb rotation handling in order to:
- eliminate the NV12 special casing
- handle fb->offsets[] properly
- make the rotation handling easier for the plane code
To achieve these goals we reduce intel_rotation_info to only contain
(for each plane) the rotated view width,height,stride in tile units,
and the page offset into the object where the plane starts. Each plane
is handled exactly the same way, no special casing for NV12 or other
formats. We then store the computed rotation_info under
intel_framebuffer so that we don't have to recompute it again.
To handle fb->offsets[] we treat them as a linear offsets and convert
them to x/y offsets from the start of the relevant GTT mapping (either
normal or rotated). We store the x/y offsets under intel_framebuffer,
and for some extra convenience we also store the rotated pitch (ie.
tile aligned plane height). So for each plane we have the normal
x/y offsets, rotated x/y offsets, and the rotated pitch. The normal
pitch is available already in fb->pitches[].
While we're gathering up all that extra information, we can also easily
compute the storage requirements for the framebuffer, so that we can
check that the object is big enough to hold it.
When it comes time to deal with the plane source coordinates, we first
rotate the clipped src coordinates to match the relevant GTT view
orientation, then add to them the fb x/y offsets. Next we compute
the aligned surface page offset, and as a result we're left with some
residual x/y offsets. Finally, if required by the hardware, we convert
the remaining x/y offsets into a linear offset.
For gen2/3 we simply skip computing the final page offset, and just
convert the src+fb x/y offsets directly into a linear offset since
that's what the hardware wants.
After this all platforms, incluing SKL+, compute these things in exactly
the same way (excluding alignemnt differences).
v2: Use BIT(DRM_ROTATE_270) instead of ROTATE_270 when rotating
plane src coordinates
Drop some spurious changes that got left behind during
development
v3: Split out more changes to prep patches (Daniel)
s/intel_fb->plane[].foo.bar/intel_fb->foo[].bar/ for brevity
Rename intel_surf_gtt_offset to intel_fb_gtt_offset
Kill the pointless 'plane' parameter from intel_fb_gtt_offset()
v4: Fix alignment vs. alignment-1 when calling
_intel_compute_tile_offset() from intel_fill_fb_info()
Pass the pitch in tiles in
stad of pixels to intel_adjust_tile_offset() from intel_fill_fb_info()
Pass the full width/height of the rotated area to
drm_rect_rotate() for clarity
Use u32 for more offsets
v5: Preserve the upper_32_bits()/lower_32_bits() handling for the
fb ggtt offset (Sivakumar)
v6: Rebase due to drm_plane_state src/dst rects
Cc: Sivakumar Thulasimani <sivakumar.thulasimani@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Sivakumar Thulasimani <sivakumar.thulasimani@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1470821001-25272-2-git-send-email-ville.syrjala@linux.intel.com
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-09-15 17:16:41 +07:00
|
|
|
unsigned int size = intel_rotation_info_size(rot_info);
|
2015-03-23 18:10:36 +07:00
|
|
|
struct sg_table *st;
|
2015-09-21 16:45:34 +07:00
|
|
|
struct scatterlist *sg;
|
2015-03-25 17:15:26 +07:00
|
|
|
int ret = -ENOMEM;
|
2018-10-16 22:04:13 +07:00
|
|
|
int i;
|
2015-03-23 18:10:36 +07:00
|
|
|
|
|
|
|
/* Allocate target SG list. */
|
|
|
|
st = kmalloc(sizeof(*st), GFP_KERNEL);
|
|
|
|
if (!st)
|
|
|
|
goto err_st_alloc;
|
|
|
|
|
drm/i915: Rewrite fb rotation GTT handling
Redo the fb rotation handling in order to:
- eliminate the NV12 special casing
- handle fb->offsets[] properly
- make the rotation handling easier for the plane code
To achieve these goals we reduce intel_rotation_info to only contain
(for each plane) the rotated view width,height,stride in tile units,
and the page offset into the object where the plane starts. Each plane
is handled exactly the same way, no special casing for NV12 or other
formats. We then store the computed rotation_info under
intel_framebuffer so that we don't have to recompute it again.
To handle fb->offsets[] we treat them as a linear offsets and convert
them to x/y offsets from the start of the relevant GTT mapping (either
normal or rotated). We store the x/y offsets under intel_framebuffer,
and for some extra convenience we also store the rotated pitch (ie.
tile aligned plane height). So for each plane we have the normal
x/y offsets, rotated x/y offsets, and the rotated pitch. The normal
pitch is available already in fb->pitches[].
While we're gathering up all that extra information, we can also easily
compute the storage requirements for the framebuffer, so that we can
check that the object is big enough to hold it.
When it comes time to deal with the plane source coordinates, we first
rotate the clipped src coordinates to match the relevant GTT view
orientation, then add to them the fb x/y offsets. Next we compute
the aligned surface page offset, and as a result we're left with some
residual x/y offsets. Finally, if required by the hardware, we convert
the remaining x/y offsets into a linear offset.
For gen2/3 we simply skip computing the final page offset, and just
convert the src+fb x/y offsets directly into a linear offset since
that's what the hardware wants.
After this all platforms, incluing SKL+, compute these things in exactly
the same way (excluding alignemnt differences).
v2: Use BIT(DRM_ROTATE_270) instead of ROTATE_270 when rotating
plane src coordinates
Drop some spurious changes that got left behind during
development
v3: Split out more changes to prep patches (Daniel)
s/intel_fb->plane[].foo.bar/intel_fb->foo[].bar/ for brevity
Rename intel_surf_gtt_offset to intel_fb_gtt_offset
Kill the pointless 'plane' parameter from intel_fb_gtt_offset()
v4: Fix alignment vs. alignment-1 when calling
_intel_compute_tile_offset() from intel_fill_fb_info()
Pass the pitch in tiles in
stad of pixels to intel_adjust_tile_offset() from intel_fill_fb_info()
Pass the full width/height of the rotated area to
drm_rect_rotate() for clarity
Use u32 for more offsets
v5: Preserve the upper_32_bits()/lower_32_bits() handling for the
fb ggtt offset (Sivakumar)
v6: Rebase due to drm_plane_state src/dst rects
Cc: Sivakumar Thulasimani <sivakumar.thulasimani@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Sivakumar Thulasimani <sivakumar.thulasimani@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1470821001-25272-2-git-send-email-ville.syrjala@linux.intel.com
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-09-15 17:16:41 +07:00
|
|
|
ret = sg_alloc_table(st, size, GFP_KERNEL);
|
2015-03-23 18:10:36 +07:00
|
|
|
if (ret)
|
|
|
|
goto err_sg_alloc;
|
|
|
|
|
2016-02-16 03:54:46 +07:00
|
|
|
st->nents = 0;
|
|
|
|
sg = st->sgl;
|
|
|
|
|
drm/i915: Rewrite fb rotation GTT handling
Redo the fb rotation handling in order to:
- eliminate the NV12 special casing
- handle fb->offsets[] properly
- make the rotation handling easier for the plane code
To achieve these goals we reduce intel_rotation_info to only contain
(for each plane) the rotated view width,height,stride in tile units,
and the page offset into the object where the plane starts. Each plane
is handled exactly the same way, no special casing for NV12 or other
formats. We then store the computed rotation_info under
intel_framebuffer so that we don't have to recompute it again.
To handle fb->offsets[] we treat them as a linear offsets and convert
them to x/y offsets from the start of the relevant GTT mapping (either
normal or rotated). We store the x/y offsets under intel_framebuffer,
and for some extra convenience we also store the rotated pitch (ie.
tile aligned plane height). So for each plane we have the normal
x/y offsets, rotated x/y offsets, and the rotated pitch. The normal
pitch is available already in fb->pitches[].
While we're gathering up all that extra information, we can also easily
compute the storage requirements for the framebuffer, so that we can
check that the object is big enough to hold it.
When it comes time to deal with the plane source coordinates, we first
rotate the clipped src coordinates to match the relevant GTT view
orientation, then add to them the fb x/y offsets. Next we compute
the aligned surface page offset, and as a result we're left with some
residual x/y offsets. Finally, if required by the hardware, we convert
the remaining x/y offsets into a linear offset.
For gen2/3 we simply skip computing the final page offset, and just
convert the src+fb x/y offsets directly into a linear offset since
that's what the hardware wants.
After this all platforms, incluing SKL+, compute these things in exactly
the same way (excluding alignemnt differences).
v2: Use BIT(DRM_ROTATE_270) instead of ROTATE_270 when rotating
plane src coordinates
Drop some spurious changes that got left behind during
development
v3: Split out more changes to prep patches (Daniel)
s/intel_fb->plane[].foo.bar/intel_fb->foo[].bar/ for brevity
Rename intel_surf_gtt_offset to intel_fb_gtt_offset
Kill the pointless 'plane' parameter from intel_fb_gtt_offset()
v4: Fix alignment vs. alignment-1 when calling
_intel_compute_tile_offset() from intel_fill_fb_info()
Pass the pitch in tiles in
stad of pixels to intel_adjust_tile_offset() from intel_fill_fb_info()
Pass the full width/height of the rotated area to
drm_rect_rotate() for clarity
Use u32 for more offsets
v5: Preserve the upper_32_bits()/lower_32_bits() handling for the
fb ggtt offset (Sivakumar)
v6: Rebase due to drm_plane_state src/dst rects
Cc: Sivakumar Thulasimani <sivakumar.thulasimani@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Sivakumar Thulasimani <sivakumar.thulasimani@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1470821001-25272-2-git-send-email-ville.syrjala@linux.intel.com
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-09-15 17:16:41 +07:00
|
|
|
for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
|
2018-10-16 22:04:13 +07:00
|
|
|
sg = rotate_pages(obj, rot_info->plane[i].offset,
|
drm/i915: Rewrite fb rotation GTT handling
Redo the fb rotation handling in order to:
- eliminate the NV12 special casing
- handle fb->offsets[] properly
- make the rotation handling easier for the plane code
To achieve these goals we reduce intel_rotation_info to only contain
(for each plane) the rotated view width,height,stride in tile units,
and the page offset into the object where the plane starts. Each plane
is handled exactly the same way, no special casing for NV12 or other
formats. We then store the computed rotation_info under
intel_framebuffer so that we don't have to recompute it again.
To handle fb->offsets[] we treat them as a linear offsets and convert
them to x/y offsets from the start of the relevant GTT mapping (either
normal or rotated). We store the x/y offsets under intel_framebuffer,
and for some extra convenience we also store the rotated pitch (ie.
tile aligned plane height). So for each plane we have the normal
x/y offsets, rotated x/y offsets, and the rotated pitch. The normal
pitch is available already in fb->pitches[].
While we're gathering up all that extra information, we can also easily
compute the storage requirements for the framebuffer, so that we can
check that the object is big enough to hold it.
When it comes time to deal with the plane source coordinates, we first
rotate the clipped src coordinates to match the relevant GTT view
orientation, then add to them the fb x/y offsets. Next we compute
the aligned surface page offset, and as a result we're left with some
residual x/y offsets. Finally, if required by the hardware, we convert
the remaining x/y offsets into a linear offset.
For gen2/3 we simply skip computing the final page offset, and just
convert the src+fb x/y offsets directly into a linear offset since
that's what the hardware wants.
After this all platforms, incluing SKL+, compute these things in exactly
the same way (excluding alignemnt differences).
v2: Use BIT(DRM_ROTATE_270) instead of ROTATE_270 when rotating
plane src coordinates
Drop some spurious changes that got left behind during
development
v3: Split out more changes to prep patches (Daniel)
s/intel_fb->plane[].foo.bar/intel_fb->foo[].bar/ for brevity
Rename intel_surf_gtt_offset to intel_fb_gtt_offset
Kill the pointless 'plane' parameter from intel_fb_gtt_offset()
v4: Fix alignment vs. alignment-1 when calling
_intel_compute_tile_offset() from intel_fill_fb_info()
Pass the pitch in tiles in
stad of pixels to intel_adjust_tile_offset() from intel_fill_fb_info()
Pass the full width/height of the rotated area to
drm_rect_rotate() for clarity
Use u32 for more offsets
v5: Preserve the upper_32_bits()/lower_32_bits() handling for the
fb ggtt offset (Sivakumar)
v6: Rebase due to drm_plane_state src/dst rects
Cc: Sivakumar Thulasimani <sivakumar.thulasimani@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Sivakumar Thulasimani <sivakumar.thulasimani@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1470821001-25272-2-git-send-email-ville.syrjala@linux.intel.com
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-09-15 17:16:41 +07:00
|
|
|
rot_info->plane[i].width, rot_info->plane[i].height,
|
|
|
|
rot_info->plane[i].stride, st, sg);
|
2015-09-21 16:45:34 +07:00
|
|
|
}
|
|
|
|
|
2015-03-23 18:10:36 +07:00
|
|
|
return st;
|
|
|
|
|
|
|
|
err_sg_alloc:
|
|
|
|
kfree(st);
|
|
|
|
err_st_alloc:
|
|
|
|
|
2017-11-22 21:56:46 +07:00
|
|
|
DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
|
|
|
|
obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
|
drm/i915: Rewrite fb rotation GTT handling
Redo the fb rotation handling in order to:
- eliminate the NV12 special casing
- handle fb->offsets[] properly
- make the rotation handling easier for the plane code
To achieve these goals we reduce intel_rotation_info to only contain
(for each plane) the rotated view width,height,stride in tile units,
and the page offset into the object where the plane starts. Each plane
is handled exactly the same way, no special casing for NV12 or other
formats. We then store the computed rotation_info under
intel_framebuffer so that we don't have to recompute it again.
To handle fb->offsets[] we treat them as a linear offsets and convert
them to x/y offsets from the start of the relevant GTT mapping (either
normal or rotated). We store the x/y offsets under intel_framebuffer,
and for some extra convenience we also store the rotated pitch (ie.
tile aligned plane height). So for each plane we have the normal
x/y offsets, rotated x/y offsets, and the rotated pitch. The normal
pitch is available already in fb->pitches[].
While we're gathering up all that extra information, we can also easily
compute the storage requirements for the framebuffer, so that we can
check that the object is big enough to hold it.
When it comes time to deal with the plane source coordinates, we first
rotate the clipped src coordinates to match the relevant GTT view
orientation, then add to them the fb x/y offsets. Next we compute
the aligned surface page offset, and as a result we're left with some
residual x/y offsets. Finally, if required by the hardware, we convert
the remaining x/y offsets into a linear offset.
For gen2/3 we simply skip computing the final page offset, and just
convert the src+fb x/y offsets directly into a linear offset since
that's what the hardware wants.
After this all platforms, incluing SKL+, compute these things in exactly
the same way (excluding alignemnt differences).
v2: Use BIT(DRM_ROTATE_270) instead of ROTATE_270 when rotating
plane src coordinates
Drop some spurious changes that got left behind during
development
v3: Split out more changes to prep patches (Daniel)
s/intel_fb->plane[].foo.bar/intel_fb->foo[].bar/ for brevity
Rename intel_surf_gtt_offset to intel_fb_gtt_offset
Kill the pointless 'plane' parameter from intel_fb_gtt_offset()
v4: Fix alignment vs. alignment-1 when calling
_intel_compute_tile_offset() from intel_fill_fb_info()
Pass the pitch in tiles in
stad of pixels to intel_adjust_tile_offset() from intel_fill_fb_info()
Pass the full width/height of the rotated area to
drm_rect_rotate() for clarity
Use u32 for more offsets
v5: Preserve the upper_32_bits()/lower_32_bits() handling for the
fb ggtt offset (Sivakumar)
v6: Rebase due to drm_plane_state src/dst rects
Cc: Sivakumar Thulasimani <sivakumar.thulasimani@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Sivakumar Thulasimani <sivakumar.thulasimani@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1470821001-25272-2-git-send-email-ville.syrjala@linux.intel.com
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2015-09-15 17:16:41 +07:00
|
|
|
|
2015-03-23 18:10:36 +07:00
|
|
|
return ERR_PTR(ret);
|
|
|
|
}
|
2015-03-16 19:11:13 +07:00
|
|
|
|
2019-05-09 19:21:52 +07:00
|
|
|
static struct scatterlist *
|
|
|
|
remap_pages(struct drm_i915_gem_object *obj, unsigned int offset,
|
|
|
|
unsigned int width, unsigned int height,
|
|
|
|
unsigned int stride,
|
|
|
|
struct sg_table *st, struct scatterlist *sg)
|
|
|
|
{
|
|
|
|
unsigned int row;
|
|
|
|
|
|
|
|
for (row = 0; row < height; row++) {
|
|
|
|
unsigned int left = width * I915_GTT_PAGE_SIZE;
|
|
|
|
|
|
|
|
while (left) {
|
|
|
|
dma_addr_t addr;
|
|
|
|
unsigned int length;
|
|
|
|
|
|
|
|
/* We don't need the pages, but need to initialize
|
|
|
|
* the entries so the sg list can be happily traversed.
|
|
|
|
* The only thing we need are DMA addresses.
|
|
|
|
*/
|
|
|
|
|
|
|
|
addr = i915_gem_object_get_dma_address_len(obj, offset, &length);
|
|
|
|
|
|
|
|
length = min(left, length);
|
|
|
|
|
|
|
|
st->nents++;
|
|
|
|
|
|
|
|
sg_set_page(sg, NULL, length, 0);
|
|
|
|
sg_dma_address(sg) = addr;
|
|
|
|
sg_dma_len(sg) = length;
|
|
|
|
sg = sg_next(sg);
|
|
|
|
|
|
|
|
offset += length / I915_GTT_PAGE_SIZE;
|
|
|
|
left -= length;
|
|
|
|
}
|
|
|
|
|
|
|
|
offset += stride - width;
|
|
|
|
}
|
|
|
|
|
|
|
|
return sg;
|
|
|
|
}
|
|
|
|
|
|
|
|
static noinline struct sg_table *
|
|
|
|
intel_remap_pages(struct intel_remapped_info *rem_info,
|
|
|
|
struct drm_i915_gem_object *obj)
|
|
|
|
{
|
|
|
|
unsigned int size = intel_remapped_info_size(rem_info);
|
|
|
|
struct sg_table *st;
|
|
|
|
struct scatterlist *sg;
|
|
|
|
int ret = -ENOMEM;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* Allocate target SG list. */
|
|
|
|
st = kmalloc(sizeof(*st), GFP_KERNEL);
|
|
|
|
if (!st)
|
|
|
|
goto err_st_alloc;
|
|
|
|
|
|
|
|
ret = sg_alloc_table(st, size, GFP_KERNEL);
|
|
|
|
if (ret)
|
|
|
|
goto err_sg_alloc;
|
|
|
|
|
|
|
|
st->nents = 0;
|
|
|
|
sg = st->sgl;
|
|
|
|
|
|
|
|
for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
|
|
|
|
sg = remap_pages(obj, rem_info->plane[i].offset,
|
|
|
|
rem_info->plane[i].width, rem_info->plane[i].height,
|
|
|
|
rem_info->plane[i].stride, st, sg);
|
|
|
|
}
|
|
|
|
|
|
|
|
i915_sg_trim(st);
|
|
|
|
|
|
|
|
return st;
|
|
|
|
|
|
|
|
err_sg_alloc:
|
|
|
|
kfree(st);
|
|
|
|
err_st_alloc:
|
|
|
|
|
|
|
|
DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
|
|
|
|
obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size);
|
|
|
|
|
|
|
|
return ERR_PTR(ret);
|
|
|
|
}
|
|
|
|
|
2017-02-15 15:43:35 +07:00
|
|
|
static noinline struct sg_table *
|
2015-05-06 18:35:38 +07:00
|
|
|
intel_partial_pages(const struct i915_ggtt_view *view,
|
|
|
|
struct drm_i915_gem_object *obj)
|
|
|
|
{
|
|
|
|
struct sg_table *st;
|
2016-10-28 19:58:34 +07:00
|
|
|
struct scatterlist *sg, *iter;
|
2017-01-14 07:28:25 +07:00
|
|
|
unsigned int count = view->partial.size;
|
2016-10-28 19:58:34 +07:00
|
|
|
unsigned int offset;
|
2015-05-06 18:35:38 +07:00
|
|
|
int ret = -ENOMEM;
|
|
|
|
|
|
|
|
st = kmalloc(sizeof(*st), GFP_KERNEL);
|
|
|
|
if (!st)
|
|
|
|
goto err_st_alloc;
|
|
|
|
|
2016-10-28 19:58:34 +07:00
|
|
|
ret = sg_alloc_table(st, count, GFP_KERNEL);
|
2015-05-06 18:35:38 +07:00
|
|
|
if (ret)
|
|
|
|
goto err_sg_alloc;
|
|
|
|
|
2017-01-14 07:28:25 +07:00
|
|
|
iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
|
2016-10-28 19:58:34 +07:00
|
|
|
GEM_BUG_ON(!iter);
|
|
|
|
|
2015-05-06 18:35:38 +07:00
|
|
|
sg = st->sgl;
|
|
|
|
st->nents = 0;
|
2016-10-28 19:58:34 +07:00
|
|
|
do {
|
|
|
|
unsigned int len;
|
2015-05-06 18:35:38 +07:00
|
|
|
|
2016-10-28 19:58:34 +07:00
|
|
|
len = min(iter->length - (offset << PAGE_SHIFT),
|
|
|
|
count << PAGE_SHIFT);
|
|
|
|
sg_set_page(sg, NULL, len, 0);
|
|
|
|
sg_dma_address(sg) =
|
|
|
|
sg_dma_address(iter) + (offset << PAGE_SHIFT);
|
|
|
|
sg_dma_len(sg) = len;
|
2015-05-06 18:35:38 +07:00
|
|
|
|
|
|
|
st->nents++;
|
2016-10-28 19:58:34 +07:00
|
|
|
count -= len >> PAGE_SHIFT;
|
|
|
|
if (count == 0) {
|
|
|
|
sg_mark_end(sg);
|
2018-09-26 15:03:53 +07:00
|
|
|
i915_sg_trim(st); /* Drop any unused tail entries. */
|
|
|
|
|
2016-10-28 19:58:34 +07:00
|
|
|
return st;
|
|
|
|
}
|
2015-05-06 18:35:38 +07:00
|
|
|
|
2016-10-28 19:58:34 +07:00
|
|
|
sg = __sg_next(sg);
|
|
|
|
iter = __sg_next(iter);
|
|
|
|
offset = 0;
|
|
|
|
} while (1);
|
2015-05-06 18:35:38 +07:00
|
|
|
|
|
|
|
err_sg_alloc:
|
|
|
|
kfree(st);
|
|
|
|
err_st_alloc:
|
|
|
|
return ERR_PTR(ret);
|
|
|
|
}
|
|
|
|
|
2015-04-14 22:35:27 +07:00
|
|
|
static int
|
2015-03-23 18:10:36 +07:00
|
|
|
i915_get_ggtt_vma_pages(struct i915_vma *vma)
|
2014-12-11 00:27:58 +07:00
|
|
|
{
|
2017-02-15 15:43:35 +07:00
|
|
|
int ret;
|
2015-03-23 18:10:36 +07:00
|
|
|
|
2016-11-04 17:30:01 +07:00
|
|
|
/* The vma->pages are only valid within the lifespan of the borrowed
|
|
|
|
* obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
|
|
|
|
* must be the vma->pages. A simple rule is that vma->pages must only
|
|
|
|
* be accessed when the obj->mm.pages are pinned.
|
|
|
|
*/
|
|
|
|
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
|
|
|
|
|
2017-02-15 15:43:35 +07:00
|
|
|
switch (vma->ggtt_view.type) {
|
2018-02-15 18:07:59 +07:00
|
|
|
default:
|
|
|
|
GEM_BUG_ON(vma->ggtt_view.type);
|
|
|
|
/* fall through */
|
2017-02-15 15:43:35 +07:00
|
|
|
case I915_GGTT_VIEW_NORMAL:
|
|
|
|
vma->pages = vma->obj->mm.pages;
|
2014-12-11 00:27:58 +07:00
|
|
|
return 0;
|
|
|
|
|
2017-02-15 15:43:35 +07:00
|
|
|
case I915_GGTT_VIEW_ROTATED:
|
2016-08-15 16:48:47 +07:00
|
|
|
vma->pages =
|
2017-02-15 15:43:35 +07:00
|
|
|
intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
|
|
|
|
break;
|
|
|
|
|
2019-05-09 19:21:52 +07:00
|
|
|
case I915_GGTT_VIEW_REMAPPED:
|
|
|
|
vma->pages =
|
|
|
|
intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
|
|
|
|
break;
|
|
|
|
|
2017-02-15 15:43:35 +07:00
|
|
|
case I915_GGTT_VIEW_PARTIAL:
|
2016-08-15 16:48:47 +07:00
|
|
|
vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
|
2017-02-15 15:43:35 +07:00
|
|
|
break;
|
|
|
|
}
|
2014-12-11 00:27:58 +07:00
|
|
|
|
2017-02-15 15:43:35 +07:00
|
|
|
ret = 0;
|
2019-02-21 09:08:19 +07:00
|
|
|
if (IS_ERR(vma->pages)) {
|
2016-08-15 16:48:47 +07:00
|
|
|
ret = PTR_ERR(vma->pages);
|
|
|
|
vma->pages = NULL;
|
2015-03-23 18:10:36 +07:00
|
|
|
DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
|
|
|
|
vma->ggtt_view.type, ret);
|
2014-12-11 00:27:58 +07:00
|
|
|
}
|
2015-03-23 18:10:36 +07:00
|
|
|
return ret;
|
2014-12-11 00:27:58 +07:00
|
|
|
}
|
|
|
|
|
2017-01-11 18:23:11 +07:00
|
|
|
/**
|
|
|
|
* i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
|
2017-01-12 23:45:59 +07:00
|
|
|
* @vm: the &struct i915_address_space
|
|
|
|
* @node: the &struct drm_mm_node (typically i915_vma.mode)
|
|
|
|
* @size: how much space to allocate inside the GTT,
|
|
|
|
* must be #I915_GTT_PAGE_SIZE aligned
|
|
|
|
* @offset: where to insert inside the GTT,
|
|
|
|
* must be #I915_GTT_MIN_ALIGNMENT aligned, and the node
|
|
|
|
* (@offset + @size) must fit within the address space
|
|
|
|
* @color: color to apply to node, if this node is not from a VMA,
|
|
|
|
* color must be #I915_COLOR_UNEVICTABLE
|
|
|
|
* @flags: control search and eviction behaviour
|
2017-01-11 18:23:11 +07:00
|
|
|
*
|
|
|
|
* i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside
|
|
|
|
* the address space (using @size and @color). If the @node does not fit, it
|
|
|
|
* tries to evict any overlapping nodes from the GTT, including any
|
|
|
|
* neighbouring nodes if the colors do not match (to ensure guard pages between
|
|
|
|
* differing domains). See i915_gem_evict_for_node() for the gory details
|
|
|
|
* on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on
|
|
|
|
* evicting active overlapping objects, and any overlapping node that is pinned
|
|
|
|
* or marked as unevictable will also result in failure.
|
|
|
|
*
|
|
|
|
* Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
|
|
|
|
* asked to wait for eviction and interrupted.
|
|
|
|
*/
|
|
|
|
int i915_gem_gtt_reserve(struct i915_address_space *vm,
|
|
|
|
struct drm_mm_node *node,
|
|
|
|
u64 size, u64 offset, unsigned long color,
|
|
|
|
unsigned int flags)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
|
|
|
GEM_BUG_ON(!size);
|
|
|
|
GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
|
|
|
|
GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
|
|
|
|
GEM_BUG_ON(range_overflows(offset, size, vm->total));
|
2019-07-30 21:32:08 +07:00
|
|
|
GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
|
2017-01-16 00:27:40 +07:00
|
|
|
GEM_BUG_ON(drm_mm_node_allocated(node));
|
2017-01-11 18:23:11 +07:00
|
|
|
|
|
|
|
node->size = size;
|
|
|
|
node->start = offset;
|
|
|
|
node->color = color;
|
|
|
|
|
|
|
|
err = drm_mm_reserve_node(&vm->mm, node);
|
|
|
|
if (err != -ENOSPC)
|
|
|
|
return err;
|
|
|
|
|
2017-06-16 21:05:21 +07:00
|
|
|
if (flags & PIN_NOEVICT)
|
|
|
|
return -ENOSPC;
|
|
|
|
|
2017-01-11 18:23:11 +07:00
|
|
|
err = i915_gem_evict_for_node(vm, node, flags);
|
|
|
|
if (err == 0)
|
|
|
|
err = drm_mm_reserve_node(&vm->mm, node);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-01-11 18:23:12 +07:00
|
|
|
static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
|
|
|
|
{
|
|
|
|
u64 range, addr;
|
|
|
|
|
|
|
|
GEM_BUG_ON(range_overflows(start, len, end));
|
|
|
|
GEM_BUG_ON(round_up(start, align) > round_down(end - len, align));
|
|
|
|
|
|
|
|
range = round_down(end - len, align) - round_up(start, align);
|
|
|
|
if (range) {
|
|
|
|
if (sizeof(unsigned long) == sizeof(u64)) {
|
|
|
|
addr = get_random_long();
|
|
|
|
} else {
|
|
|
|
addr = get_random_int();
|
|
|
|
if (range > U32_MAX) {
|
|
|
|
addr <<= 32;
|
|
|
|
addr |= get_random_int();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
div64_u64_rem(addr, range, &addr);
|
|
|
|
start += addr;
|
|
|
|
}
|
|
|
|
|
|
|
|
return round_up(start, align);
|
|
|
|
}
|
|
|
|
|
2017-01-11 18:23:10 +07:00
|
|
|
/**
|
|
|
|
* i915_gem_gtt_insert - insert a node into an address_space (GTT)
|
2017-01-12 23:45:59 +07:00
|
|
|
* @vm: the &struct i915_address_space
|
|
|
|
* @node: the &struct drm_mm_node (typically i915_vma.node)
|
|
|
|
* @size: how much space to allocate inside the GTT,
|
|
|
|
* must be #I915_GTT_PAGE_SIZE aligned
|
|
|
|
* @alignment: required alignment of starting offset, may be 0 but
|
|
|
|
* if specified, this must be a power-of-two and at least
|
|
|
|
* #I915_GTT_MIN_ALIGNMENT
|
|
|
|
* @color: color to apply to node
|
|
|
|
* @start: start of any range restriction inside GTT (0 for all),
|
2017-01-11 18:23:10 +07:00
|
|
|
* must be #I915_GTT_PAGE_SIZE aligned
|
2017-01-12 23:45:59 +07:00
|
|
|
* @end: end of any range restriction inside GTT (U64_MAX for all),
|
|
|
|
* must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX
|
|
|
|
* @flags: control search and eviction behaviour
|
2017-01-11 18:23:10 +07:00
|
|
|
*
|
|
|
|
* i915_gem_gtt_insert() first searches for an available hole into which
|
|
|
|
* is can insert the node. The hole address is aligned to @alignment and
|
|
|
|
* its @size must then fit entirely within the [@start, @end] bounds. The
|
|
|
|
* nodes on either side of the hole must match @color, or else a guard page
|
|
|
|
* will be inserted between the two nodes (or the node evicted). If no
|
2017-01-11 18:23:12 +07:00
|
|
|
* suitable hole is found, first a victim is randomly selected and tested
|
|
|
|
* for eviction, otherwise then the LRU list of objects within the GTT
|
2017-01-11 18:23:10 +07:00
|
|
|
* is scanned to find the first set of replacement nodes to create the hole.
|
|
|
|
* Those old overlapping nodes are evicted from the GTT (and so must be
|
|
|
|
* rebound before any future use). Any node that is currently pinned cannot
|
|
|
|
* be evicted (see i915_vma_pin()). Similar if the node's VMA is currently
|
|
|
|
* active and #PIN_NONBLOCK is specified, that node is also skipped when
|
|
|
|
* searching for an eviction candidate. See i915_gem_evict_something() for
|
|
|
|
* the gory details on the eviction algorithm.
|
|
|
|
*
|
|
|
|
* Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
|
|
|
|
* asked to wait for eviction and interrupted.
|
|
|
|
*/
|
|
|
|
int i915_gem_gtt_insert(struct i915_address_space *vm,
|
|
|
|
struct drm_mm_node *node,
|
|
|
|
u64 size, u64 alignment, unsigned long color,
|
|
|
|
u64 start, u64 end, unsigned int flags)
|
|
|
|
{
|
2017-02-03 04:04:38 +07:00
|
|
|
enum drm_mm_insert_mode mode;
|
2017-01-11 18:23:12 +07:00
|
|
|
u64 offset;
|
2017-01-11 18:23:10 +07:00
|
|
|
int err;
|
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 20:39:58 +07:00
|
|
|
lockdep_assert_held(&vm->mutex);
|
|
|
|
|
2017-01-11 18:23:10 +07:00
|
|
|
GEM_BUG_ON(!size);
|
|
|
|
GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
|
|
|
|
GEM_BUG_ON(alignment && !is_power_of_2(alignment));
|
|
|
|
GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
|
|
|
|
GEM_BUG_ON(start >= end);
|
|
|
|
GEM_BUG_ON(start > 0 && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
|
|
|
|
GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
|
2019-07-30 21:32:08 +07:00
|
|
|
GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
|
2017-01-16 00:27:40 +07:00
|
|
|
GEM_BUG_ON(drm_mm_node_allocated(node));
|
2017-01-11 18:23:10 +07:00
|
|
|
|
|
|
|
if (unlikely(range_overflows(start, size, end)))
|
|
|
|
return -ENOSPC;
|
|
|
|
|
|
|
|
if (unlikely(round_up(start, alignment) > round_down(end - size, alignment)))
|
|
|
|
return -ENOSPC;
|
|
|
|
|
2017-02-03 04:04:38 +07:00
|
|
|
mode = DRM_MM_INSERT_BEST;
|
|
|
|
if (flags & PIN_HIGH)
|
2018-05-21 15:21:30 +07:00
|
|
|
mode = DRM_MM_INSERT_HIGHEST;
|
2017-02-03 04:04:38 +07:00
|
|
|
if (flags & PIN_MAPPABLE)
|
|
|
|
mode = DRM_MM_INSERT_LOW;
|
2017-01-11 18:23:10 +07:00
|
|
|
|
|
|
|
/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
|
|
|
|
* so we know that we always have a minimum alignment of 4096.
|
|
|
|
* The drm_mm range manager is optimised to return results
|
|
|
|
* with zero alignment, so where possible use the optimal
|
|
|
|
* path.
|
|
|
|
*/
|
|
|
|
BUILD_BUG_ON(I915_GTT_MIN_ALIGNMENT > I915_GTT_PAGE_SIZE);
|
|
|
|
if (alignment <= I915_GTT_MIN_ALIGNMENT)
|
|
|
|
alignment = 0;
|
|
|
|
|
2017-02-03 04:04:38 +07:00
|
|
|
err = drm_mm_insert_node_in_range(&vm->mm, node,
|
|
|
|
size, alignment, color,
|
|
|
|
start, end, mode);
|
2017-01-11 18:23:10 +07:00
|
|
|
if (err != -ENOSPC)
|
|
|
|
return err;
|
|
|
|
|
2018-05-21 15:21:30 +07:00
|
|
|
if (mode & DRM_MM_INSERT_ONCE) {
|
|
|
|
err = drm_mm_insert_node_in_range(&vm->mm, node,
|
|
|
|
size, alignment, color,
|
|
|
|
start, end,
|
|
|
|
DRM_MM_INSERT_BEST);
|
|
|
|
if (err != -ENOSPC)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-06-16 21:05:21 +07:00
|
|
|
if (flags & PIN_NOEVICT)
|
|
|
|
return -ENOSPC;
|
|
|
|
|
2019-08-21 19:32:34 +07:00
|
|
|
/*
|
|
|
|
* No free space, pick a slot at random.
|
2017-01-11 18:23:12 +07:00
|
|
|
*
|
|
|
|
* There is a pathological case here using a GTT shared between
|
|
|
|
* mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt):
|
|
|
|
*
|
|
|
|
* |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->|
|
|
|
|
* (64k objects) (448k objects)
|
|
|
|
*
|
|
|
|
* Now imagine that the eviction LRU is ordered top-down (just because
|
|
|
|
* pathology meets real life), and that we need to evict an object to
|
|
|
|
* make room inside the aperture. The eviction scan then has to walk
|
|
|
|
* the 448k list before it finds one within range. And now imagine that
|
|
|
|
* it has to search for a new hole between every byte inside the memcpy,
|
|
|
|
* for several simultaneous clients.
|
|
|
|
*
|
|
|
|
* On a full-ppgtt system, if we have run out of available space, there
|
|
|
|
* will be lots and lots of objects in the eviction list! Again,
|
|
|
|
* searching that LRU list may be slow if we are also applying any
|
|
|
|
* range restrictions (e.g. restriction to low 4GiB) and so, for
|
|
|
|
* simplicity and similarilty between different GTT, try the single
|
|
|
|
* random replacement first.
|
|
|
|
*/
|
|
|
|
offset = random_offset(start, end,
|
|
|
|
size, alignment ?: I915_GTT_MIN_ALIGNMENT);
|
|
|
|
err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags);
|
|
|
|
if (err != -ENOSPC)
|
|
|
|
return err;
|
|
|
|
|
2019-08-21 19:32:34 +07:00
|
|
|
if (flags & PIN_NOSEARCH)
|
|
|
|
return -ENOSPC;
|
|
|
|
|
2017-01-11 18:23:12 +07:00
|
|
|
/* Randomly selected placement is pinned, do a search */
|
2017-01-11 18:23:10 +07:00
|
|
|
err = i915_gem_evict_something(vm, size, alignment, color,
|
|
|
|
start, end, flags);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2017-02-03 04:04:38 +07:00
|
|
|
return drm_mm_insert_node_in_range(&vm->mm, node,
|
|
|
|
size, alignment, color,
|
|
|
|
start, end, DRM_MM_INSERT_EVICT);
|
2017-01-11 18:23:10 +07:00
|
|
|
}
|
2017-02-14 00:15:18 +07:00
|
|
|
|
|
|
|
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
|
|
|
#include "selftests/mock_gtt.c"
|
2017-02-14 00:15:38 +07:00
|
|
|
#include "selftests/i915_gem_gtt.c"
|
2017-02-14 00:15:18 +07:00
|
|
|
#endif
|