From a1871936c0f4f10850a5671c5deb1a92fcca1155 Mon Sep 17 00:00:00 2001 From: Luca Tettamanti Date: Wed, 3 Oct 2012 15:00:19 +0200 Subject: [PATCH 01/11] drm/radeon: use %zu for formatting size_t Fixes compiler warnings on 32bit. Signed-off-by: Luca Tettamanti Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_acpi.c b/drivers/gpu/drm/radeon/radeon_acpi.c index b0a5688c67f8..ecb7e3374a56 100644 --- a/drivers/gpu/drm/radeon/radeon_acpi.c +++ b/drivers/gpu/drm/radeon/radeon_acpi.c @@ -201,7 +201,7 @@ static int radeon_atif_verify_interface(acpi_handle handle, size = *(u16 *) info->buffer.pointer; if (size < 12) { - DRM_INFO("ATIF buffer is too small: %lu\n", size); + DRM_INFO("ATIF buffer is too small: %zu\n", size); err = -EINVAL; goto out; } @@ -485,7 +485,7 @@ static int radeon_atcs_verify_interface(acpi_handle handle, size = *(u16 *) info->buffer.pointer; if (size < 8) { - DRM_INFO("ATCS buffer is too small: %lu\n", size); + DRM_INFO("ATCS buffer is too small: %zu\n", size); err = -EINVAL; goto out; } From cd23492af3d4401c02c48a4bebe5995c9498eac5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 4 Oct 2012 09:10:10 -0400 Subject: [PATCH 02/11] drm/radeon: fix compilation with backlight disabled Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_acpi.c | 2 + .../gpu/drm/radeon/radeon_legacy_encoders.c | 42 +++++++++---------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_acpi.c b/drivers/gpu/drm/radeon/radeon_acpi.c index ecb7e3374a56..196d28d99570 100644 --- a/drivers/gpu/drm/radeon/radeon_acpi.c +++ b/drivers/gpu/drm/radeon/radeon_acpi.c @@ -370,6 +370,7 @@ int radeon_atif_handler(struct radeon_device *rdev, radeon_set_backlight_level(rdev, enc, req.backlight_level); +#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) if (rdev->is_atom_bios) { struct radeon_encoder_atom_dig *dig = enc->enc_priv; backlight_force_update(dig->bl_dev, @@ -379,6 +380,7 @@ int radeon_atif_handler(struct radeon_device *rdev, backlight_force_update(dig->bl_dev, BACKLIGHT_UPDATE_HOTKEY); } +#endif } } /* TODO: check other events */ diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c index 92487e614778..fb15b62ff984 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c @@ -269,27 +269,6 @@ static const struct drm_encoder_helper_funcs radeon_legacy_lvds_helper_funcs = { .disable = radeon_legacy_encoder_disable, }; -#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) - -static uint8_t radeon_legacy_lvds_level(struct backlight_device *bd) -{ - struct radeon_backlight_privdata *pdata = bl_get_data(bd); - uint8_t level; - - /* Convert brightness to hardware level */ - if (bd->props.brightness < 0) - level = 0; - else if (bd->props.brightness > RADEON_MAX_BL_LEVEL) - level = RADEON_MAX_BL_LEVEL; - else - level = bd->props.brightness; - - if (pdata->negative) - level = RADEON_MAX_BL_LEVEL - level; - - return level; -} - u8 radeon_legacy_get_backlight_level(struct radeon_encoder *radeon_encoder) { @@ -331,6 +310,27 @@ radeon_legacy_set_backlight_level(struct radeon_encoder *radeon_encoder, u8 leve radeon_legacy_lvds_update(&radeon_encoder->base, dpms_mode); } +#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) + +static uint8_t radeon_legacy_lvds_level(struct backlight_device *bd) +{ + struct radeon_backlight_privdata *pdata = bl_get_data(bd); + uint8_t level; + + /* Convert brightness to hardware level */ + if (bd->props.brightness < 0) + level = 0; + else if (bd->props.brightness > RADEON_MAX_BL_LEVEL) + level = RADEON_MAX_BL_LEVEL; + else + level = bd->props.brightness; + + if (pdata->negative) + level = RADEON_MAX_BL_LEVEL - level; + + return level; +} + static int radeon_legacy_backlight_update_status(struct backlight_device *bd) { struct radeon_backlight_privdata *pdata = bl_get_data(bd); From 29dbe3bcd2e28e71823febdca989d63d5c27d152 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 5 Oct 2012 10:22:02 -0400 Subject: [PATCH 03/11] drm/radeon: allocate PPLLs from low to high The order shouldn't matter, but there have been problems reported on certain older asics. This behaves more like the original code before the PPLL allocation rework. Signed-off-by: Alex Deucher Cc: Markus Trippelsdorf --- drivers/gpu/drm/radeon/atombios_crtc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 96184d02c8d9..2e566e123e9e 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1690,10 +1690,10 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) } /* all other cases */ pll_in_use = radeon_get_pll_use_mask(crtc); - if (!(pll_in_use & (1 << ATOM_PPLL2))) - return ATOM_PPLL2; if (!(pll_in_use & (1 << ATOM_PPLL1))) return ATOM_PPLL1; + if (!(pll_in_use & (1 << ATOM_PPLL2))) + return ATOM_PPLL2; DRM_ERROR("unable to allocate a PPLL\n"); return ATOM_PPLL_INVALID; } else { @@ -1715,10 +1715,10 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) } /* all other cases */ pll_in_use = radeon_get_pll_use_mask(crtc); - if (!(pll_in_use & (1 << ATOM_PPLL2))) - return ATOM_PPLL2; if (!(pll_in_use & (1 << ATOM_PPLL1))) return ATOM_PPLL1; + if (!(pll_in_use & (1 << ATOM_PPLL2))) + return ATOM_PPLL2; DRM_ERROR("unable to allocate a PPLL\n"); return ATOM_PPLL_INVALID; } else { From 23d4f1f246a98dde1dc485e0be9a647126630347 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 8 Oct 2012 09:45:46 -0400 Subject: [PATCH 04/11] drm/radeon: update comments to clarify VM setup (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The actual set up and assignment of VM page tables is done on the fly in radeon_gart.c. v2: update vm size comments Signed-off-by: Alex Deucher Reviewed-by: Christian König --- drivers/gpu/drm/radeon/ni.c | 4 ++++ drivers/gpu/drm/radeon/radeon_device.c | 4 ++++ drivers/gpu/drm/radeon/si.c | 7 ++++--- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 8bcb554ea0c5..83dc0852d5c9 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -770,6 +770,10 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev) WREG32(0x15DC, 0); /* empty context1-7 */ + /* Assign the pt base to something valid for now; the pts used for + * the VMs are determined by the application and setup and assigned + * on the fly in the vm part of radeon_gart.c + */ for (i = 1; i < 8; i++) { WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (i << 2), 0); WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2), 0); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 64a42647f08a..bd13ca09eb62 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1018,6 +1018,10 @@ int radeon_device_init(struct radeon_device *rdev, return r; /* initialize vm here */ mutex_init(&rdev->vm_manager.lock); + /* Adjust VM size here. + * Currently set to 4GB ((1 << 20) 4k pages). + * Max GPUVM size for cayman and SI is 40 bits. + */ rdev->vm_manager.max_pfn = 1 << 20; INIT_LIST_HEAD(&rdev->vm_manager.lru_vm); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index f79633a036c3..df8dd7701643 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -2407,12 +2407,13 @@ static int si_pcie_gart_enable(struct radeon_device *rdev) WREG32(0x15DC, 0); /* empty context1-15 */ - /* FIXME start with 4G, once using 2 level pt switch to full - * vm size space - */ /* set vm size, must be a multiple of 4 */ WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn); + /* Assign the pt base to something valid for now; the pts used for + * the VMs are determined by the application and setup and assigned + * on the fly in the vm part of radeon_gart.c + */ for (i = 1; i < 16; i++) { if (i < 8) WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2), From 90a51a329258e3c868f6f4c1fb264ca01c590c57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 9 Oct 2012 13:31:17 +0200 Subject: [PATCH 05/11] drm/radeon: allocate page tables on demand v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on Dmitries work, but splitting the code into page directory and page table handling makes it far more readable and (hopefully) more reliable. Allocations of page tables are made from the SA on demand, that should still work fine since all page tables are of the same size. Also using the fact that allocations from the SA are mostly continuously (except for end of buffer wraps and under very high memory pressure) to group updates send to the chipset specific code into larger chunks. v3: mostly a rewrite of Dmitries previous patch. v4: fix some typos and coding style Signed-off-by: Dmitry Cherkasov Signed-off-by: Christian König Tested-by: Michel Dänzer Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/ni.c | 2 +- drivers/gpu/drm/radeon/radeon.h | 11 +- drivers/gpu/drm/radeon/radeon_gart.c | 322 +++++++++++++++++++++------ 3 files changed, 262 insertions(+), 73 deletions(-) diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 83dc0852d5c9..ab8d1f5fe68a 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1580,7 +1580,7 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, 0); radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (vm->id << 2), 0)); - radeon_ring_write(ring, vm->last_pfn); + radeon_ring_write(ring, rdev->vm_manager.max_pfn); radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0)); radeon_ring_write(ring, vm->pd_gpu_addr >> 12); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b04c06444d8b..bc6b56bf274a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -663,9 +663,14 @@ struct radeon_vm { struct list_head list; struct list_head va; unsigned id; - unsigned last_pfn; - u64 pd_gpu_addr; - struct radeon_sa_bo *sa_bo; + + /* contains the page directory */ + struct radeon_sa_bo *page_directory; + uint64_t pd_gpu_addr; + + /* array of page tables, one for each page directory entry */ + struct radeon_sa_bo **page_tables; + struct mutex mutex; /* last fence for cs using this vm */ struct radeon_fence *fence; diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index f0c06d196b75..98b170a0df90 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -422,6 +422,18 @@ void radeon_gart_fini(struct radeon_device *rdev) * TODO bind a default page at vm initialization for default address */ +/** + * radeon_vm_num_pde - return the number of page directory entries + * + * @rdev: radeon_device pointer + * + * Calculate the number of page directory entries (cayman+). + */ +static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) +{ + return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE; +} + /** * radeon_vm_directory_size - returns the size of the page directory in bytes * @@ -431,7 +443,7 @@ void radeon_gart_fini(struct radeon_device *rdev) */ static unsigned radeon_vm_directory_size(struct radeon_device *rdev) { - return (rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE) * 8; + return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); } /** @@ -451,11 +463,11 @@ int radeon_vm_manager_init(struct radeon_device *rdev) if (!rdev->vm_manager.enabled) { /* allocate enough for 2 full VM pts */ - size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); - size += RADEON_GPU_PAGE_ALIGN(rdev->vm_manager.max_pfn * 8); + size = radeon_vm_directory_size(rdev); + size += rdev->vm_manager.max_pfn * 8; size *= 2; r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, - size, + RADEON_GPU_PAGE_ALIGN(size), RADEON_GEM_DOMAIN_VRAM); if (r) { dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", @@ -476,7 +488,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev) /* restore page table */ list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { - if (vm->sa_bo == NULL) + if (vm->page_directory == NULL) continue; list_for_each_entry(bo_va, &vm->va, vm_list) { @@ -500,16 +512,25 @@ static void radeon_vm_free_pt(struct radeon_device *rdev, struct radeon_vm *vm) { struct radeon_bo_va *bo_va; + int i; - if (!vm->sa_bo) + if (!vm->page_directory) return; list_del_init(&vm->list); - radeon_sa_bo_free(rdev, &vm->sa_bo, vm->fence); + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); list_for_each_entry(bo_va, &vm->va, vm_list) { bo_va->valid = false; } + + if (vm->page_tables == NULL) + return; + + for (i = 0; i < radeon_vm_num_pdes(rdev); i++) + radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); + + kfree(vm->page_tables); } /** @@ -545,6 +566,35 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) rdev->vm_manager.enabled = false; } +/** + * radeon_vm_evict - evict page table to make room for new one + * + * @rdev: radeon_device pointer + * @vm: VM we want to allocate something for + * + * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). + * Returns 0 for success, -ENOMEM for failure. + * + * Global and local mutex must be locked! + */ +int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) +{ + struct radeon_vm *vm_evict; + + if (list_empty(&rdev->vm_manager.lru_vm)) + return -ENOMEM; + + vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, + struct radeon_vm, list); + if (vm_evict == vm) + return -ENOMEM; + + mutex_lock(&vm_evict->mutex); + radeon_vm_free_pt(rdev, vm_evict); + mutex_unlock(&vm_evict->mutex); + return 0; +} + /** * radeon_vm_alloc_pt - allocates a page table for a VM * @@ -559,20 +609,15 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) */ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) { - struct radeon_vm *vm_evict; - int r; + unsigned pd_size, pts_size; u64 *pd_addr; - int tables_size; + int r; if (vm == NULL) { return -EINVAL; } - /* allocate enough to cover the current VM size */ - tables_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); - tables_size += RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8); - - if (vm->sa_bo != NULL) { + if (vm->page_directory != NULL) { /* update lru */ list_del_init(&vm->list); list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); @@ -580,25 +625,34 @@ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) } retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->sa_bo, - tables_size, RADEON_GPU_PAGE_SIZE, false); + pd_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); + r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, + &vm->page_directory, pd_size, + RADEON_GPU_PAGE_SIZE, false); if (r == -ENOMEM) { - if (list_empty(&rdev->vm_manager.lru_vm)) { + r = radeon_vm_evict(rdev, vm); + if (r) return r; - } - vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list); - mutex_lock(&vm_evict->mutex); - radeon_vm_free_pt(rdev, vm_evict); - mutex_unlock(&vm_evict->mutex); goto retry; } else if (r) { return r; } - pd_addr = radeon_sa_bo_cpu_addr(vm->sa_bo); - vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo); - memset(pd_addr, 0, tables_size); + vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); + + /* Initially clear the page directory */ + pd_addr = radeon_sa_bo_cpu_addr(vm->page_directory); + memset(pd_addr, 0, pd_size); + + pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); + vm->page_tables = kzalloc(pts_size, GFP_KERNEL); + + if (vm->page_tables == NULL) { + DRM_ERROR("Cannot allocate memory for page table array\n"); + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); + return -ENOMEM; + } list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, @@ -793,20 +847,6 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, } mutex_lock(&vm->mutex); - if (last_pfn > vm->last_pfn) { - /* release mutex and lock in right order */ - mutex_unlock(&vm->mutex); - mutex_lock(&rdev->vm_manager.lock); - mutex_lock(&vm->mutex); - /* and check again */ - if (last_pfn > vm->last_pfn) { - /* grow va space 32M by 32M */ - unsigned align = ((32 << 20) >> 12) - 1; - radeon_vm_free_pt(rdev, vm); - vm->last_pfn = (last_pfn + align) & ~align; - } - mutex_unlock(&rdev->vm_manager.lock); - } head = &vm->va; last_offset = 0; list_for_each_entry(tmp, &vm->va, vm_list) { @@ -864,6 +904,155 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) return result; } +/** + * radeon_vm_update_pdes - make sure that page directory is valid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * + * Allocates new page tables if necessary + * and updates the page directory (cayman+). + * Returns 0 for success, error for failure. + * + * Global and local mutex must be locked! + */ +static int radeon_vm_update_pdes(struct radeon_device *rdev, + struct radeon_vm *vm, + uint64_t start, uint64_t end) +{ + static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; + + uint64_t last_pde = ~0, last_pt = ~0; + unsigned count = 0; + uint64_t pt_idx; + int r; + + start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + + /* walk over the address space and update the page directory */ + for (pt_idx = start; pt_idx <= end; ++pt_idx) { + uint64_t pde, pt; + + if (vm->page_tables[pt_idx]) + continue; + +retry: + r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, + &vm->page_tables[pt_idx], + RADEON_VM_PTE_COUNT * 8, + RADEON_GPU_PAGE_SIZE, false); + + if (r == -ENOMEM) { + r = radeon_vm_evict(rdev, vm); + if (r) + return r; + goto retry; + } else if (r) { + return r; + } + + pde = vm->pd_gpu_addr + pt_idx * 8; + + pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + + if (((last_pde + 8 * count) != pde) || + ((last_pt + incr * count) != pt)) { + + if (count) { + radeon_asic_vm_set_page(rdev, last_pde, + last_pt, count, incr, + RADEON_VM_PAGE_VALID); + } + + count = 1; + last_pde = pde; + last_pt = pt; + } else { + ++count; + } + } + + if (count) { + radeon_asic_vm_set_page(rdev, last_pde, last_pt, count, + incr, RADEON_VM_PAGE_VALID); + + } + + return 0; +} + +/** + * radeon_vm_update_ptes - make sure that page tables are valid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * @dst: destination address to map to + * @flags: mapping flags + * + * Update the page tables in the range @start - @end (cayman+). + * + * Global and local mutex must be locked! + */ +static void radeon_vm_update_ptes(struct radeon_device *rdev, + struct radeon_vm *vm, + uint64_t start, uint64_t end, + uint64_t dst, uint32_t flags) +{ + static const uint64_t mask = RADEON_VM_PTE_COUNT - 1; + + uint64_t last_pte = ~0, last_dst = ~0; + unsigned count = 0; + uint64_t addr; + + start = start / RADEON_GPU_PAGE_SIZE; + end = end / RADEON_GPU_PAGE_SIZE; + + /* walk over the address space and update the page tables */ + for (addr = start; addr < end; ) { + uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; + unsigned nptes; + uint64_t pte; + + if ((addr & ~mask) == (end & ~mask)) + nptes = end - addr; + else + nptes = RADEON_VM_PTE_COUNT - (addr & mask); + + pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + pte += (addr & mask) * 8; + + if (((last_pte + 8 * count) != pte) || + ((count + nptes) > 1 << 11)) { + + if (count) { + radeon_asic_vm_set_page(rdev, last_pte, + last_dst, count, + RADEON_GPU_PAGE_SIZE, + flags); + } + + count = nptes; + last_pte = pte; + last_dst = dst; + } else { + count += nptes; + } + + addr += nptes; + dst += nptes * RADEON_GPU_PAGE_SIZE; + } + + if (count) { + radeon_asic_vm_set_page(rdev, last_pte, last_dst, count, + RADEON_GPU_PAGE_SIZE, flags); + } +} + /** * radeon_vm_bo_update_pte - map a bo into the vm page table * @@ -887,12 +1076,11 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, struct radeon_semaphore *sem = NULL; struct radeon_bo_va *bo_va; unsigned nptes, npdes, ndw; - uint64_t pe, addr; - uint64_t pfn; + uint64_t addr; int r; /* nothing to do if vm isn't bound */ - if (vm->sa_bo == NULL) + if (vm->page_directory == NULL) return 0; bo_va = radeon_vm_bo_find(vm, bo); @@ -939,25 +1127,29 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, } } - /* estimate number of dw needed */ - /* reserve space for 32-bit padding */ - ndw = 32; - nptes = radeon_bo_ngpu_pages(bo); - pfn = (bo_va->soffset / RADEON_GPU_PAGE_SIZE); + /* assume two extra pdes in case the mapping overlaps the borders */ + npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; - /* handle cases where a bo spans several pdes */ - npdes = (ALIGN(pfn + nptes, RADEON_VM_PTE_COUNT) - - (pfn & ~(RADEON_VM_PTE_COUNT - 1))) >> RADEON_VM_BLOCK_SIZE; + /* estimate number of dw needed */ + /* semaphore, fence and padding */ + ndw = 32; + + if (RADEON_VM_BLOCK_SIZE > 11) + /* reserve space for one header for every 2k dwords */ + ndw += (nptes >> 11) * 3; + else + /* reserve space for one header for + every (1 << BLOCK_SIZE) entries */ + ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 3; - /* reserve space for one header for every 2k dwords */ - ndw += (nptes >> 11) * 3; /* reserve space for pte addresses */ ndw += nptes * 2; /* reserve space for one header for every 2k dwords */ ndw += (npdes >> 11) * 3; + /* reserve space for pde addresses */ ndw += npdes * 2; @@ -971,22 +1163,14 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, radeon_fence_note_sync(vm->fence, ridx); } - /* update page table entries */ - pe = vm->pd_gpu_addr; - pe += radeon_vm_directory_size(rdev); - pe += (bo_va->soffset / RADEON_GPU_PAGE_SIZE) * 8; + r = radeon_vm_update_pdes(rdev, vm, bo_va->soffset, bo_va->eoffset); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } - radeon_asic_vm_set_page(rdev, pe, addr, nptes, - RADEON_GPU_PAGE_SIZE, bo_va->flags); - - /* update page directory entries */ - addr = pe; - - pe = vm->pd_gpu_addr; - pe += ((bo_va->soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE) * 8; - - radeon_asic_vm_set_page(rdev, pe, addr, npdes, - RADEON_VM_PTE_COUNT * 8, RADEON_VM_PAGE_VALID); + radeon_vm_update_ptes(rdev, vm, bo_va->soffset, bo_va->eoffset, + addr, bo_va->flags); radeon_fence_unref(&vm->fence); r = radeon_fence_emit(rdev, &vm->fence, ridx); @@ -997,6 +1181,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, radeon_ring_unlock_commit(rdev, ring); radeon_semaphore_free(rdev, &sem, vm->fence); radeon_fence_unref(&vm->last_flush); + return 0; } @@ -1068,7 +1253,6 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) vm->id = 0; vm->fence = NULL; - vm->last_pfn = 0; mutex_init(&vm->mutex); INIT_LIST_HEAD(&vm->list); INIT_LIST_HEAD(&vm->va); From d72d43cfc5847c176edabc72e6431ba691322c98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 9 Oct 2012 13:31:18 +0200 Subject: [PATCH 06/11] drm/radeon: don't add the IB pool to all VMs v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to use VMs without the IB pool in the future. v2: also remove it from radeon_vm_finish. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 2 +- drivers/gpu/drm/radeon/radeon_gart.c | 34 +++------------------------- drivers/gpu/drm/radeon/radeon_kms.c | 22 +++++++++++++++++- 3 files changed, 25 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index bc6b56bf274a..54cf9b594ca6 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1848,7 +1848,7 @@ extern void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size */ int radeon_vm_manager_init(struct radeon_device *rdev); void radeon_vm_manager_fini(struct radeon_device *rdev); -int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); +void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm); int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm); struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 98b170a0df90..7e21ea32242a 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -602,7 +602,6 @@ int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) * @vm: vm to bind * * Allocate a page table for the requested vm (cayman+). - * Also starts to populate the page table. * Returns 0 for success, error for failure. * * Global and local mutex must be locked! @@ -655,8 +654,7 @@ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) } list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); - return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, - &rdev->ring_tmp_bo.bo->tbo.mem); + return 0; } /** @@ -1241,30 +1239,15 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev, * @rdev: radeon_device pointer * @vm: requested vm * - * Init @vm (cayman+). - * Map the IB pool and any other shared objects into the VM - * by default as it's used by all VMs. - * Returns 0 for success, error for failure. + * Init @vm fields (cayman+). */ -int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) +void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) { - struct radeon_bo_va *bo_va; - int r; - vm->id = 0; vm->fence = NULL; mutex_init(&vm->mutex); INIT_LIST_HEAD(&vm->list); INIT_LIST_HEAD(&vm->va); - - /* map the ib pool buffer at 0 in virtual address space, set - * read only - */ - bo_va = radeon_vm_bo_add(rdev, vm, rdev->ring_tmp_bo.bo); - r = radeon_vm_bo_set_addr(rdev, bo_va, RADEON_VA_IB_OFFSET, - RADEON_VM_PAGE_READABLE | - RADEON_VM_PAGE_SNOOPED); - return r; } /** @@ -1286,17 +1269,6 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) radeon_vm_free_pt(rdev, vm); mutex_unlock(&rdev->vm_manager.lock); - /* remove all bo at this point non are busy any more because unbind - * waited for the last vm fence to signal - */ - r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); - if (!r) { - bo_va = radeon_vm_bo_find(vm, rdev->ring_tmp_bo.bo); - list_del_init(&bo_va->bo_list); - list_del_init(&bo_va->vm_list); - radeon_bo_unreserve(rdev->ring_tmp_bo.bo); - kfree(bo_va); - } if (!list_empty(&vm->va)) { dev_err(rdev->dev, "still active bo inside vm\n"); } diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 83b8d8aa71c0..dc781c49b96b 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -419,6 +419,7 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) /* new gpu have virtual address space support */ if (rdev->family >= CHIP_CAYMAN) { struct radeon_fpriv *fpriv; + struct radeon_bo_va *bo_va; int r; fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL); @@ -426,7 +427,15 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) return -ENOMEM; } - r = radeon_vm_init(rdev, &fpriv->vm); + radeon_vm_init(rdev, &fpriv->vm); + + /* map the ib pool buffer read only into + * virtual address space */ + bo_va = radeon_vm_bo_add(rdev, &fpriv->vm, + rdev->ring_tmp_bo.bo); + r = radeon_vm_bo_set_addr(rdev, bo_va, RADEON_VA_IB_OFFSET, + RADEON_VM_PAGE_READABLE | + RADEON_VM_PAGE_SNOOPED); if (r) { radeon_vm_fini(rdev, &fpriv->vm); kfree(fpriv); @@ -454,6 +463,17 @@ void radeon_driver_postclose_kms(struct drm_device *dev, /* new gpu have virtual address space support */ if (rdev->family >= CHIP_CAYMAN && file_priv->driver_priv) { struct radeon_fpriv *fpriv = file_priv->driver_priv; + struct radeon_bo_va *bo_va; + int r; + + r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); + if (!r) { + bo_va = radeon_vm_bo_find(&fpriv->vm, + rdev->ring_tmp_bo.bo); + if (bo_va) + radeon_vm_bo_rmv(rdev, bo_va); + radeon_bo_unreserve(rdev->ring_tmp_bo.bo); + } radeon_vm_fini(rdev, &fpriv->vm); kfree(fpriv); From 13e55c38f8ba4bb15ff9b51e2c5e7801c0f29526 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 9 Oct 2012 13:31:19 +0200 Subject: [PATCH 07/11] drm/radeon: separate pt alloc from lru add MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make it possible to allocate a persistent page table. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_cs.c | 1 + drivers/gpu/drm/radeon/radeon_gart.c | 20 ++++++++++++++++---- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 54cf9b594ca6..8c42d54c2e26 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1851,6 +1851,7 @@ void radeon_vm_manager_fini(struct radeon_device *rdev); void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm); int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm); +void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm); struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, struct radeon_vm *vm, int ring); void radeon_vm_fence(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index cb7b7c062fef..41672cc563fb 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -478,6 +478,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, } out: + radeon_vm_add_to_lru(rdev, vm); mutex_unlock(&vm->mutex); mutex_unlock(&rdev->vm_manager.lock); return r; diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 7e21ea32242a..a7677dd1ce98 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -617,9 +617,6 @@ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) } if (vm->page_directory != NULL) { - /* update lru */ - list_del_init(&vm->list); - list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); return 0; } @@ -653,10 +650,25 @@ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) return -ENOMEM; } - list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); return 0; } +/** + * radeon_vm_add_to_lru - add VMs page table to LRU list + * + * @rdev: radeon_device pointer + * @vm: vm to add to LRU + * + * Add the allocated page table to the LRU list (cayman+). + * + * Global mutex must be locked! + */ +void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) +{ + list_del_init(&vm->list); + list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); +} + /** * radeon_vm_grab_id - allocate the next free VMID * From c1a7ca0de38c23a15f652b1693afd56c9f07b16c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 8 Oct 2012 12:15:13 -0400 Subject: [PATCH 08/11] drm/radeon/cayman: set VM max pfn at MC init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to emit them at VM flush as we no longer use variable sized page tables now that we support 2 level page tables. This matches the behavior of SI (which does not support variable sized page tables). Signed-off-by: Alex Deucher Reviewed-by: Christian König --- drivers/gpu/drm/radeon/ni.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index ab8d1f5fe68a..8c74c729586d 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -776,7 +776,7 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev) */ for (i = 1; i < 8; i++) { WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (i << 2), 0); - WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2), 0); + WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2), rdev->vm_manager.max_pfn); WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2), rdev->gart.table_addr >> 12); } @@ -1576,12 +1576,6 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) if (vm == NULL) return; - radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (vm->id << 2), 0)); - radeon_ring_write(ring, 0); - - radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (vm->id << 2), 0)); - radeon_ring_write(ring, rdev->vm_manager.max_pfn); - radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0)); radeon_ring_write(ring, vm->pd_gpu_addr >> 12); From 3691feea9826771d853d28d37b6b6e34758fa66d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 8 Oct 2012 17:46:27 -0400 Subject: [PATCH 09/11] drm/radeon: check if pcie gen 2 is already enabled (v2) If so, skip enabling it to save time. v2: coding style fixes Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/evergreen.c | 7 ++++++- drivers/gpu/drm/radeon/r600.c | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index a1f49c5fd74b..14313ad43b76 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -3431,9 +3431,14 @@ void evergreen_pcie_gen2_enable(struct radeon_device *rdev) if (!(mask & DRM_PCIE_SPEED_50)) return; + speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL); + if (speed_cntl & LC_CURRENT_DATA_RATE) { + DRM_INFO("PCIE gen 2 link speeds already enabled\n"); + return; + } + DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n"); - speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL); if ((speed_cntl & LC_OTHER_SIDE_EVER_SENT_GEN2) || (speed_cntl & LC_OTHER_SIDE_SUPPORTS_GEN2)) { diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 70c800ff6190..cda280d157da 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3703,6 +3703,12 @@ static void r600_pcie_gen2_enable(struct radeon_device *rdev) if (!(mask & DRM_PCIE_SPEED_50)) return; + speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL); + if (speed_cntl & LC_CURRENT_DATA_RATE) { + DRM_INFO("PCIE gen 2 link speeds already enabled\n"); + return; + } + DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n"); /* 55 nm r6xx asics */ From 082918471139b07964967cfe5f70230909c82ae1 Mon Sep 17 00:00:00 2001 From: Egbert Eich Date: Mon, 15 Oct 2012 08:21:39 +0200 Subject: [PATCH 10/11] drm/radeon: Don't destroy I2C Bus Rec in radeon_ext_tmds_enc_destroy(). radeon_i2c_fini() walks thru the list of I2C bus recs rdev->i2c_bus[] to destroy each of them. radeon_ext_tmds_enc_destroy() however also has code to destroy it's associated I2C bus rec which has been obtained by radeon_i2c_lookup() and is therefore also in the i2c_bus[] list. This causes a double free resulting in a kernel panic when unloading the radeon driver. Removing destroy code from radeon_ext_tmds_enc_destroy() fixes this problem. agd5f: fix compiler warning Signed-off-by: Egbert Eich Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_legacy_encoders.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c index fb15b62ff984..a13ad9d707cf 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c @@ -991,11 +991,7 @@ static void radeon_legacy_tmds_ext_mode_set(struct drm_encoder *encoder, static void radeon_ext_tmds_enc_destroy(struct drm_encoder *encoder) { struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); - struct radeon_encoder_ext_tmds *tmds = radeon_encoder->enc_priv; - if (tmds) { - if (tmds->i2c_bus) - radeon_i2c_destroy(tmds->i2c_bus); - } + /* don't destroy the i2c bus record here, this will be done in radeon_i2c_fini */ kfree(radeon_encoder->enc_priv); drm_encoder_cleanup(encoder); kfree(radeon_encoder); From 8ad33cdf97639e2a1601b6dd8629d351c1c50198 Mon Sep 17 00:00:00 2001 From: Thomas Friebel Date: Mon, 15 Oct 2012 13:16:22 -0400 Subject: [PATCH 11/11] drm/radeon: fix spelling typos in debugging output Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index bba66902c83b..47634f27f2e5 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -305,7 +305,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v) { #if DRM_DEBUG_CODE if (ring->count_dw <= 0) { - DRM_ERROR("radeon: writting more dword to ring than expected !\n"); + DRM_ERROR("radeon: writing more dwords to the ring than expected!\n"); } #endif ring->ring[ring->wptr++] = v;