linux_dsm_epyc7002/drivers/gpu/drm/i915/intel_pm.c

4904 lines
136 KiB
C
Raw Normal View History

/*
* Copyright © 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Eugeni Dodonov <eugeni.dodonov@intel.com>
*
*/
#include <linux/cpufreq.h>
#include "i915_drv.h"
#include "intel_drv.h"
#include "../../../platform/x86/intel_ips.h"
#include <linux/module.h>
#define FORCEWAKE_ACK_TIMEOUT_MS 2
/* FBC, or Frame Buffer Compression, is a technique employed to compress the
* framebuffer contents in-memory, aiming at reducing the required bandwidth
* during in-memory transfers and, therefore, reduce the power packet.
*
* The benefits of FBC are mostly visible with solid backgrounds and
* variation-less patterns.
*
* FBC-related functionality can be enabled by the means of the
* i915.i915_enable_fbc parameter
*/
static bool intel_crtc_active(struct drm_crtc *crtc)
{
/* Be paranoid as we can arrive here with only partial
* state retrieved from the hardware during setup.
*/
return to_intel_crtc(crtc)->active && crtc->fb && crtc->mode.clock;
}
static void i8xx_disable_fbc(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u32 fbc_ctl;
/* Disable compression */
fbc_ctl = I915_READ(FBC_CONTROL);
if ((fbc_ctl & FBC_CTL_EN) == 0)
return;
fbc_ctl &= ~FBC_CTL_EN;
I915_WRITE(FBC_CONTROL, fbc_ctl);
/* Wait for compressing bit to clear */
if (wait_for((I915_READ(FBC_STATUS) & FBC_STAT_COMPRESSING) == 0, 10)) {
DRM_DEBUG_KMS("FBC idle timed out\n");
return;
}
DRM_DEBUG_KMS("disabled FBC\n");
}
static void i8xx_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
{
struct drm_device *dev = crtc->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_framebuffer *fb = crtc->fb;
struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
struct drm_i915_gem_object *obj = intel_fb->obj;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
int cfb_pitch;
int plane, i;
u32 fbc_ctl, fbc_ctl2;
cfb_pitch = dev_priv->cfb_size / FBC_LL_SIZE;
if (fb->pitches[0] < cfb_pitch)
cfb_pitch = fb->pitches[0];
/* FBC_CTL wants 64B units */
cfb_pitch = (cfb_pitch / 64) - 1;
plane = intel_crtc->plane == 0 ? FBC_CTL_PLANEA : FBC_CTL_PLANEB;
/* Clear old tags */
for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++)
I915_WRITE(FBC_TAG + (i * 4), 0);
/* Set it up... */
fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | FBC_CTL_CPU_FENCE;
fbc_ctl2 |= plane;
I915_WRITE(FBC_CONTROL2, fbc_ctl2);
I915_WRITE(FBC_FENCE_OFF, crtc->y);
/* enable it... */
fbc_ctl = FBC_CTL_EN | FBC_CTL_PERIODIC;
if (IS_I945GM(dev))
fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */
fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT;
fbc_ctl |= (interval & 0x2fff) << FBC_CTL_INTERVAL_SHIFT;
fbc_ctl |= obj->fence_reg;
I915_WRITE(FBC_CONTROL, fbc_ctl);
DRM_DEBUG_KMS("enabled FBC, pitch %d, yoff %d, plane %c, ",
cfb_pitch, crtc->y, plane_name(intel_crtc->plane));
}
static bool i8xx_fbc_enabled(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
return I915_READ(FBC_CONTROL) & FBC_CTL_EN;
}
static void g4x_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
{
struct drm_device *dev = crtc->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_framebuffer *fb = crtc->fb;
struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
struct drm_i915_gem_object *obj = intel_fb->obj;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
int plane = intel_crtc->plane == 0 ? DPFC_CTL_PLANEA : DPFC_CTL_PLANEB;
unsigned long stall_watermark = 200;
u32 dpfc_ctl;
dpfc_ctl = plane | DPFC_SR_EN | DPFC_CTL_LIMIT_1X;
dpfc_ctl |= DPFC_CTL_FENCE_EN | obj->fence_reg;
I915_WRITE(DPFC_CHICKEN, DPFC_HT_MODIFY);
I915_WRITE(DPFC_RECOMP_CTL, DPFC_RECOMP_STALL_EN |
(stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) |
(interval << DPFC_RECOMP_TIMER_COUNT_SHIFT));
I915_WRITE(DPFC_FENCE_YOFF, crtc->y);
/* enable it... */
I915_WRITE(DPFC_CONTROL, I915_READ(DPFC_CONTROL) | DPFC_CTL_EN);
DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
}
static void g4x_disable_fbc(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u32 dpfc_ctl;
/* Disable compression */
dpfc_ctl = I915_READ(DPFC_CONTROL);
if (dpfc_ctl & DPFC_CTL_EN) {
dpfc_ctl &= ~DPFC_CTL_EN;
I915_WRITE(DPFC_CONTROL, dpfc_ctl);
DRM_DEBUG_KMS("disabled FBC\n");
}
}
static bool g4x_fbc_enabled(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
}
static void sandybridge_blit_fbc_update(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u32 blt_ecoskpd;
/* Make sure blitter notifies FBC of writes */
gen6_gt_force_wake_get(dev_priv);
blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
GEN6_BLITTER_LOCK_SHIFT;
I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY;
I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
blt_ecoskpd &= ~(GEN6_BLITTER_FBC_NOTIFY <<
GEN6_BLITTER_LOCK_SHIFT);
I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
POSTING_READ(GEN6_BLITTER_ECOSKPD);
gen6_gt_force_wake_put(dev_priv);
}
static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
{
struct drm_device *dev = crtc->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_framebuffer *fb = crtc->fb;
struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
struct drm_i915_gem_object *obj = intel_fb->obj;
struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
int plane = intel_crtc->plane == 0 ? DPFC_CTL_PLANEA : DPFC_CTL_PLANEB;
unsigned long stall_watermark = 200;
u32 dpfc_ctl;
dpfc_ctl = I915_READ(ILK_DPFC_CONTROL);
dpfc_ctl &= DPFC_RESERVED;
dpfc_ctl |= (plane | DPFC_CTL_LIMIT_1X);
/* Set persistent mode for front-buffer rendering, ala X. */
dpfc_ctl |= DPFC_CTL_PERSISTENT_MODE;
dpfc_ctl |= (DPFC_CTL_FENCE_EN | obj->fence_reg);
I915_WRITE(ILK_DPFC_CHICKEN, DPFC_HT_MODIFY);
I915_WRITE(ILK_DPFC_RECOMP_CTL, DPFC_RECOMP_STALL_EN |
(stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) |
(interval << DPFC_RECOMP_TIMER_COUNT_SHIFT));
I915_WRITE(ILK_DPFC_FENCE_YOFF, crtc->y);
I915_WRITE(ILK_FBC_RT_BASE, obj->gtt_offset | ILK_FBC_RT_VALID);
/* enable it... */
I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
if (IS_GEN6(dev)) {
I915_WRITE(SNB_DPFC_CTL_SA,
SNB_CPU_FENCE_ENABLE | obj->fence_reg);
I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
sandybridge_blit_fbc_update(dev);
}
DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
}
static void ironlake_disable_fbc(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u32 dpfc_ctl;
/* Disable compression */
dpfc_ctl = I915_READ(ILK_DPFC_CONTROL);
if (dpfc_ctl & DPFC_CTL_EN) {
dpfc_ctl &= ~DPFC_CTL_EN;
I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl);
DRM_DEBUG_KMS("disabled FBC\n");
}
}
static bool ironlake_fbc_enabled(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
return I915_READ(ILK_DPFC_CONTROL) & DPFC_CTL_EN;
}
bool intel_fbc_enabled(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
if (!dev_priv->display.fbc_enabled)
return false;
return dev_priv->display.fbc_enabled(dev);
}
static void intel_fbc_work_fn(struct work_struct *__work)
{
struct intel_fbc_work *work =
container_of(to_delayed_work(__work),
struct intel_fbc_work, work);
struct drm_device *dev = work->crtc->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
mutex_lock(&dev->struct_mutex);
if (work == dev_priv->fbc_work) {
/* Double check that we haven't switched fb without cancelling
* the prior work.
*/
if (work->crtc->fb == work->fb) {
dev_priv->display.enable_fbc(work->crtc,
work->interval);
dev_priv->cfb_plane = to_intel_crtc(work->crtc)->plane;
dev_priv->cfb_fb = work->crtc->fb->base.id;
dev_priv->cfb_y = work->crtc->y;
}
dev_priv->fbc_work = NULL;
}
mutex_unlock(&dev->struct_mutex);
kfree(work);
}
static void intel_cancel_fbc_work(struct drm_i915_private *dev_priv)
{
if (dev_priv->fbc_work == NULL)
return;
DRM_DEBUG_KMS("cancelling pending FBC enable\n");
/* Synchronisation is provided by struct_mutex and checking of
* dev_priv->fbc_work, so we can perform the cancellation
* entirely asynchronously.
*/
if (cancel_delayed_work(&dev_priv->fbc_work->work))
/* tasklet was killed before being run, clean up */
kfree(dev_priv->fbc_work);
/* Mark the work as no longer wanted so that if it does
* wake-up (because the work was already running and waiting
* for our mutex), it will discover that is no longer
* necessary to run.
*/
dev_priv->fbc_work = NULL;
}
void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
{
struct intel_fbc_work *work;
struct drm_device *dev = crtc->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
if (!dev_priv->display.enable_fbc)
return;
intel_cancel_fbc_work(dev_priv);
work = kzalloc(sizeof *work, GFP_KERNEL);
if (work == NULL) {
dev_priv->display.enable_fbc(crtc, interval);
return;
}
work->crtc = crtc;
work->fb = crtc->fb;
work->interval = interval;
INIT_DELAYED_WORK(&work->work, intel_fbc_work_fn);
dev_priv->fbc_work = work;
DRM_DEBUG_KMS("scheduling delayed FBC enable\n");
/* Delay the actual enabling to let pageflipping cease and the
* display to settle before starting the compression. Note that
* this delay also serves a second purpose: it allows for a
* vblank to pass after disabling the FBC before we attempt
* to modify the control registers.
*
* A more complicated solution would involve tracking vblanks
* following the termination of the page-flipping sequence
* and indeed performing the enable as a co-routine and not
* waiting synchronously upon the vblank.
*/
schedule_delayed_work(&work->work, msecs_to_jiffies(50));
}
void intel_disable_fbc(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
intel_cancel_fbc_work(dev_priv);
if (!dev_priv->display.disable_fbc)
return;
dev_priv->display.disable_fbc(dev);
dev_priv->cfb_plane = -1;
}
/**
* intel_update_fbc - enable/disable FBC as needed
* @dev: the drm_device
*
* Set up the framebuffer compression hardware at mode set time. We
* enable it if possible:
* - plane A only (on pre-965)
* - no pixel mulitply/line duplication
* - no alpha buffer discard
* - no dual wide
* - framebuffer <= 2048 in width, 1536 in height
*
* We can't assume that any compression will take place (worst case),
* so the compressed buffer has to be the same size as the uncompressed
* one. It also must reside (along with the line length buffer) in
* stolen memory.
*
* We need to enable/disable FBC on a global basis.
*/
void intel_update_fbc(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_crtc *crtc = NULL, *tmp_crtc;
struct intel_crtc *intel_crtc;
struct drm_framebuffer *fb;
struct intel_framebuffer *intel_fb;
struct drm_i915_gem_object *obj;
int enable_fbc;
if (!i915_powersave)
return;
if (!I915_HAS_FBC(dev))
return;
/*
* If FBC is already on, we just have to verify that we can
* keep it that way...
* Need to disable if:
* - more than one pipe is active
* - changing FBC params (stride, fence, mode)
* - new fb is too large to fit in compressed buffer
* - going to an unsupported config (interlace, pixel multiply, etc.)
*/
list_for_each_entry(tmp_crtc, &dev->mode_config.crtc_list, head) {
if (intel_crtc_active(tmp_crtc) &&
!to_intel_crtc(tmp_crtc)->primary_disabled) {
if (crtc) {
DRM_DEBUG_KMS("more than one pipe active, disabling compression\n");
dev_priv->no_fbc_reason = FBC_MULTIPLE_PIPES;
goto out_disable;
}
crtc = tmp_crtc;
}
}
if (!crtc || crtc->fb == NULL) {
DRM_DEBUG_KMS("no output, disabling\n");
dev_priv->no_fbc_reason = FBC_NO_OUTPUT;
goto out_disable;
}
intel_crtc = to_intel_crtc(crtc);
fb = crtc->fb;
intel_fb = to_intel_framebuffer(fb);
obj = intel_fb->obj;
enable_fbc = i915_enable_fbc;
if (enable_fbc < 0) {
DRM_DEBUG_KMS("fbc set to per-chip default\n");
enable_fbc = 1;
if (INTEL_INFO(dev)->gen <= 6)
enable_fbc = 0;
}
if (!enable_fbc) {
DRM_DEBUG_KMS("fbc disabled per module param\n");
dev_priv->no_fbc_reason = FBC_MODULE_PARAM;
goto out_disable;
}
if ((crtc->mode.flags & DRM_MODE_FLAG_INTERLACE) ||
(crtc->mode.flags & DRM_MODE_FLAG_DBLSCAN)) {
DRM_DEBUG_KMS("mode incompatible with compression, "
"disabling\n");
dev_priv->no_fbc_reason = FBC_UNSUPPORTED_MODE;
goto out_disable;
}
if ((crtc->mode.hdisplay > 2048) ||
(crtc->mode.vdisplay > 1536)) {
DRM_DEBUG_KMS("mode too large for compression, disabling\n");
dev_priv->no_fbc_reason = FBC_MODE_TOO_LARGE;
goto out_disable;
}
if ((IS_I915GM(dev) || IS_I945GM(dev)) && intel_crtc->plane != 0) {
DRM_DEBUG_KMS("plane not 0, disabling compression\n");
dev_priv->no_fbc_reason = FBC_BAD_PLANE;
goto out_disable;
}
/* The use of a CPU fence is mandatory in order to detect writes
* by the CPU to the scanout and trigger updates to the FBC.
*/
if (obj->tiling_mode != I915_TILING_X ||
obj->fence_reg == I915_FENCE_REG_NONE) {
DRM_DEBUG_KMS("framebuffer not tiled or fenced, disabling compression\n");
dev_priv->no_fbc_reason = FBC_NOT_TILED;
goto out_disable;
}
/* If the kernel debugger is active, always disable compression */
if (in_dbg_master())
goto out_disable;
if (i915_gem_stolen_setup_compression(dev, intel_fb->obj->base.size)) {
DRM_DEBUG_KMS("framebuffer too large, disabling compression\n");
dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
goto out_disable;
}
/* If the scanout has not changed, don't modify the FBC settings.
* Note that we make the fundamental assumption that the fb->obj
* cannot be unpinned (and have its GTT offset and fence revoked)
* without first being decoupled from the scanout and FBC disabled.
*/
if (dev_priv->cfb_plane == intel_crtc->plane &&
dev_priv->cfb_fb == fb->base.id &&
dev_priv->cfb_y == crtc->y)
return;
if (intel_fbc_enabled(dev)) {
/* We update FBC along two paths, after changing fb/crtc
* configuration (modeswitching) and after page-flipping
* finishes. For the latter, we know that not only did
* we disable the FBC at the start of the page-flip
* sequence, but also more than one vblank has passed.
*
* For the former case of modeswitching, it is possible
* to switch between two FBC valid configurations
* instantaneously so we do need to disable the FBC
* before we can modify its control registers. We also
* have to wait for the next vblank for that to take
* effect. However, since we delay enabling FBC we can
* assume that a vblank has passed since disabling and
* that we can safely alter the registers in the deferred
* callback.
*
* In the scenario that we go from a valid to invalid
* and then back to valid FBC configuration we have
* no strict enforcement that a vblank occurred since
* disabling the FBC. However, along all current pipe
* disabling paths we do need to wait for a vblank at
* some point. And we wait before enabling FBC anyway.
*/
DRM_DEBUG_KMS("disabling active FBC for update\n");
intel_disable_fbc(dev);
}
intel_enable_fbc(crtc, 500);
return;
out_disable:
/* Multiple disables should be harmless */
if (intel_fbc_enabled(dev)) {
DRM_DEBUG_KMS("unsupported config, disabling FBC\n");
intel_disable_fbc(dev);
}
i915_gem_stolen_cleanup_compression(dev);
}
static void i915_pineview_get_mem_freq(struct drm_device *dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
u32 tmp;
tmp = I915_READ(CLKCFG);
switch (tmp & CLKCFG_FSB_MASK) {
case CLKCFG_FSB_533:
dev_priv->fsb_freq = 533; /* 133*4 */
break;
case CLKCFG_FSB_800:
dev_priv->fsb_freq = 800; /* 200*4 */
break;
case CLKCFG_FSB_667:
dev_priv->fsb_freq = 667; /* 167*4 */
break;
case CLKCFG_FSB_400:
dev_priv->fsb_freq = 400; /* 100*4 */
break;
}
switch (tmp & CLKCFG_MEM_MASK) {
case CLKCFG_MEM_533:
dev_priv->mem_freq = 533;
break;
case CLKCFG_MEM_667:
dev_priv->mem_freq = 667;
break;
case CLKCFG_MEM_800:
dev_priv->mem_freq = 800;
break;
}
/* detect pineview DDR3 setting */
tmp = I915_READ(CSHRDDR3CTL);
dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
}
static void i915_ironlake_get_mem_freq(struct drm_device *dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
u16 ddrpll, csipll;
ddrpll = I915_READ16(DDRMPLL1);
csipll = I915_READ16(CSIPLL0);
switch (ddrpll & 0xff) {
case 0xc:
dev_priv->mem_freq = 800;
break;
case 0x10:
dev_priv->mem_freq = 1066;
break;
case 0x14:
dev_priv->mem_freq = 1333;
break;
case 0x18:
dev_priv->mem_freq = 1600;
break;
default:
DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
ddrpll & 0xff);
dev_priv->mem_freq = 0;
break;
}
dev_priv->ips.r_t = dev_priv->mem_freq;
switch (csipll & 0x3ff) {
case 0x00c:
dev_priv->fsb_freq = 3200;
break;
case 0x00e:
dev_priv->fsb_freq = 3733;
break;
case 0x010:
dev_priv->fsb_freq = 4266;
break;
case 0x012:
dev_priv->fsb_freq = 4800;
break;
case 0x014:
dev_priv->fsb_freq = 5333;
break;
case 0x016:
dev_priv->fsb_freq = 5866;
break;
case 0x018:
dev_priv->fsb_freq = 6400;
break;
default:
DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
csipll & 0x3ff);
dev_priv->fsb_freq = 0;
break;
}
if (dev_priv->fsb_freq == 3200) {
dev_priv->ips.c_m = 0;
} else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
dev_priv->ips.c_m = 1;
} else {
dev_priv->ips.c_m = 2;
}
}
static const struct cxsr_latency cxsr_latency_table[] = {
{1, 0, 800, 400, 3382, 33382, 3983, 33983}, /* DDR2-400 SC */
{1, 0, 800, 667, 3354, 33354, 3807, 33807}, /* DDR2-667 SC */
{1, 0, 800, 800, 3347, 33347, 3763, 33763}, /* DDR2-800 SC */
{1, 1, 800, 667, 6420, 36420, 6873, 36873}, /* DDR3-667 SC */
{1, 1, 800, 800, 5902, 35902, 6318, 36318}, /* DDR3-800 SC */
{1, 0, 667, 400, 3400, 33400, 4021, 34021}, /* DDR2-400 SC */
{1, 0, 667, 667, 3372, 33372, 3845, 33845}, /* DDR2-667 SC */
{1, 0, 667, 800, 3386, 33386, 3822, 33822}, /* DDR2-800 SC */
{1, 1, 667, 667, 6438, 36438, 6911, 36911}, /* DDR3-667 SC */
{1, 1, 667, 800, 5941, 35941, 6377, 36377}, /* DDR3-800 SC */
{1, 0, 400, 400, 3472, 33472, 4173, 34173}, /* DDR2-400 SC */
{1, 0, 400, 667, 3443, 33443, 3996, 33996}, /* DDR2-667 SC */
{1, 0, 400, 800, 3430, 33430, 3946, 33946}, /* DDR2-800 SC */
{1, 1, 400, 667, 6509, 36509, 7062, 37062}, /* DDR3-667 SC */
{1, 1, 400, 800, 5985, 35985, 6501, 36501}, /* DDR3-800 SC */
{0, 0, 800, 400, 3438, 33438, 4065, 34065}, /* DDR2-400 SC */
{0, 0, 800, 667, 3410, 33410, 3889, 33889}, /* DDR2-667 SC */
{0, 0, 800, 800, 3403, 33403, 3845, 33845}, /* DDR2-800 SC */
{0, 1, 800, 667, 6476, 36476, 6955, 36955}, /* DDR3-667 SC */
{0, 1, 800, 800, 5958, 35958, 6400, 36400}, /* DDR3-800 SC */
{0, 0, 667, 400, 3456, 33456, 4103, 34106}, /* DDR2-400 SC */
{0, 0, 667, 667, 3428, 33428, 3927, 33927}, /* DDR2-667 SC */
{0, 0, 667, 800, 3443, 33443, 3905, 33905}, /* DDR2-800 SC */
{0, 1, 667, 667, 6494, 36494, 6993, 36993}, /* DDR3-667 SC */
{0, 1, 667, 800, 5998, 35998, 6460, 36460}, /* DDR3-800 SC */
{0, 0, 400, 400, 3528, 33528, 4255, 34255}, /* DDR2-400 SC */
{0, 0, 400, 667, 3500, 33500, 4079, 34079}, /* DDR2-667 SC */
{0, 0, 400, 800, 3487, 33487, 4029, 34029}, /* DDR2-800 SC */
{0, 1, 400, 667, 6566, 36566, 7145, 37145}, /* DDR3-667 SC */
{0, 1, 400, 800, 6042, 36042, 6584, 36584}, /* DDR3-800 SC */
};
static const struct cxsr_latency *intel_get_cxsr_latency(int is_desktop,
int is_ddr3,
int fsb,
int mem)
{
const struct cxsr_latency *latency;
int i;
if (fsb == 0 || mem == 0)
return NULL;
for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
latency = &cxsr_latency_table[i];
if (is_desktop == latency->is_desktop &&
is_ddr3 == latency->is_ddr3 &&
fsb == latency->fsb_freq && mem == latency->mem_freq)
return latency;
}
DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
return NULL;
}
static void pineview_disable_cxsr(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
/* deactivate cxsr */
I915_WRITE(DSPFW3, I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN);
}
/*
* Latency for FIFO fetches is dependent on several factors:
* - memory configuration (speed, channels)
* - chipset
* - current MCH state
* It can be fairly high in some situations, so here we assume a fairly
* pessimal value. It's a tradeoff between extra memory fetches (if we
* set this value too high, the FIFO will fetch frequently to stay full)
* and power consumption (set it too low to save power and we might see
* FIFO underruns and display "flicker").
*
* A value of 5us seems to be a good balance; safe for very low end
* platforms but not overly aggressive on lower latency configs.
*/
static const int latency_ns = 5000;
static int i9xx_get_fifo_size(struct drm_device *dev, int plane)
{
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t dsparb = I915_READ(DSPARB);
int size;
size = dsparb & 0x7f;
if (plane)
size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
plane ? "B" : "A", size);
return size;
}
static int i85x_get_fifo_size(struct drm_device *dev, int plane)
{
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t dsparb = I915_READ(DSPARB);
int size;
size = dsparb & 0x1ff;
if (plane)
size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
size >>= 1; /* Convert to cachelines */
DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
plane ? "B" : "A", size);
return size;
}
static int i845_get_fifo_size(struct drm_device *dev, int plane)
{
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t dsparb = I915_READ(DSPARB);
int size;
size = dsparb & 0x7f;
size >>= 2; /* Convert to cachelines */
DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
plane ? "B" : "A",
size);
return size;
}
static int i830_get_fifo_size(struct drm_device *dev, int plane)
{
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t dsparb = I915_READ(DSPARB);
int size;
size = dsparb & 0x7f;
size >>= 1; /* Convert to cachelines */
DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
plane ? "B" : "A", size);
return size;
}
/* Pineview has different values for various configs */
static const struct intel_watermark_params pineview_display_wm = {
PINEVIEW_DISPLAY_FIFO,
PINEVIEW_MAX_WM,
PINEVIEW_DFT_WM,
PINEVIEW_GUARD_WM,
PINEVIEW_FIFO_LINE_SIZE
};
static const struct intel_watermark_params pineview_display_hplloff_wm = {
PINEVIEW_DISPLAY_FIFO,
PINEVIEW_MAX_WM,
PINEVIEW_DFT_HPLLOFF_WM,
PINEVIEW_GUARD_WM,
PINEVIEW_FIFO_LINE_SIZE
};
static const struct intel_watermark_params pineview_cursor_wm = {
PINEVIEW_CURSOR_FIFO,
PINEVIEW_CURSOR_MAX_WM,
PINEVIEW_CURSOR_DFT_WM,
PINEVIEW_CURSOR_GUARD_WM,
PINEVIEW_FIFO_LINE_SIZE,
};
static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
PINEVIEW_CURSOR_FIFO,
PINEVIEW_CURSOR_MAX_WM,
PINEVIEW_CURSOR_DFT_WM,
PINEVIEW_CURSOR_GUARD_WM,
PINEVIEW_FIFO_LINE_SIZE
};
static const struct intel_watermark_params g4x_wm_info = {
G4X_FIFO_SIZE,
G4X_MAX_WM,
G4X_MAX_WM,
2,
G4X_FIFO_LINE_SIZE,
};
static const struct intel_watermark_params g4x_cursor_wm_info = {
I965_CURSOR_FIFO,
I965_CURSOR_MAX_WM,
I965_CURSOR_DFT_WM,
2,
G4X_FIFO_LINE_SIZE,
};
static const struct intel_watermark_params valleyview_wm_info = {
VALLEYVIEW_FIFO_SIZE,
VALLEYVIEW_MAX_WM,
VALLEYVIEW_MAX_WM,
2,
G4X_FIFO_LINE_SIZE,
};
static const struct intel_watermark_params valleyview_cursor_wm_info = {
I965_CURSOR_FIFO,
VALLEYVIEW_CURSOR_MAX_WM,
I965_CURSOR_DFT_WM,
2,
G4X_FIFO_LINE_SIZE,
};
static const struct intel_watermark_params i965_cursor_wm_info = {
I965_CURSOR_FIFO,
I965_CURSOR_MAX_WM,
I965_CURSOR_DFT_WM,
2,
I915_FIFO_LINE_SIZE,
};
static const struct intel_watermark_params i945_wm_info = {
I945_FIFO_SIZE,
I915_MAX_WM,
1,
2,
I915_FIFO_LINE_SIZE
};
static const struct intel_watermark_params i915_wm_info = {
I915_FIFO_SIZE,
I915_MAX_WM,
1,
2,
I915_FIFO_LINE_SIZE
};
static const struct intel_watermark_params i855_wm_info = {
I855GM_FIFO_SIZE,
I915_MAX_WM,
1,
2,
I830_FIFO_LINE_SIZE
};
static const struct intel_watermark_params i830_wm_info = {
I830_FIFO_SIZE,
I915_MAX_WM,
1,
2,
I830_FIFO_LINE_SIZE
};
static const struct intel_watermark_params ironlake_display_wm_info = {
ILK_DISPLAY_FIFO,
ILK_DISPLAY_MAXWM,
ILK_DISPLAY_DFTWM,
2,
ILK_FIFO_LINE_SIZE
};
static const struct intel_watermark_params ironlake_cursor_wm_info = {
ILK_CURSOR_FIFO,
ILK_CURSOR_MAXWM,
ILK_CURSOR_DFTWM,
2,
ILK_FIFO_LINE_SIZE
};
static const struct intel_watermark_params ironlake_display_srwm_info = {
ILK_DISPLAY_SR_FIFO,
ILK_DISPLAY_MAX_SRWM,
ILK_DISPLAY_DFT_SRWM,
2,
ILK_FIFO_LINE_SIZE
};
static const struct intel_watermark_params ironlake_cursor_srwm_info = {
ILK_CURSOR_SR_FIFO,
ILK_CURSOR_MAX_SRWM,
ILK_CURSOR_DFT_SRWM,
2,
ILK_FIFO_LINE_SIZE
};
static const struct intel_watermark_params sandybridge_display_wm_info = {
SNB_DISPLAY_FIFO,
SNB_DISPLAY_MAXWM,
SNB_DISPLAY_DFTWM,
2,
SNB_FIFO_LINE_SIZE
};
static const struct intel_watermark_params sandybridge_cursor_wm_info = {
SNB_CURSOR_FIFO,
SNB_CURSOR_MAXWM,
SNB_CURSOR_DFTWM,
2,
SNB_FIFO_LINE_SIZE
};
static const struct intel_watermark_params sandybridge_display_srwm_info = {
SNB_DISPLAY_SR_FIFO,
SNB_DISPLAY_MAX_SRWM,
SNB_DISPLAY_DFT_SRWM,
2,
SNB_FIFO_LINE_SIZE
};
static const struct intel_watermark_params sandybridge_cursor_srwm_info = {
SNB_CURSOR_SR_FIFO,
SNB_CURSOR_MAX_SRWM,
SNB_CURSOR_DFT_SRWM,
2,
SNB_FIFO_LINE_SIZE
};
/**
* intel_calculate_wm - calculate watermark level
* @clock_in_khz: pixel clock
* @wm: chip FIFO params
* @pixel_size: display pixel size
* @latency_ns: memory latency for the platform
*
* Calculate the watermark level (the level at which the display plane will
* start fetching from memory again). Each chip has a different display
* FIFO size and allocation, so the caller needs to figure that out and pass
* in the correct intel_watermark_params structure.
*
* As the pixel clock runs, the FIFO will be drained at a rate that depends
* on the pixel size. When it reaches the watermark level, it'll start
* fetching FIFO line sized based chunks from memory until the FIFO fills
* past the watermark point. If the FIFO drains completely, a FIFO underrun
* will occur, and a display engine hang could result.
*/
static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
const struct intel_watermark_params *wm,
int fifo_size,
int pixel_size,
unsigned long latency_ns)
{
long entries_required, wm_size;
/*
* Note: we need to make sure we don't overflow for various clock &
* latency values.
* clocks go from a few thousand to several hundred thousand.
* latency is usually a few thousand
*/
entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) /
1000;
entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size);
DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required);
wm_size = fifo_size - (entries_required + wm->guard_size);
DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size);
/* Don't promote wm_size to unsigned... */
if (wm_size > (long)wm->max_wm)
wm_size = wm->max_wm;
if (wm_size <= 0)
wm_size = wm->default_wm;
return wm_size;
}
static struct drm_crtc *single_enabled_crtc(struct drm_device *dev)
{
struct drm_crtc *crtc, *enabled = NULL;
list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
if (intel_crtc_active(crtc)) {
if (enabled)
return NULL;
enabled = crtc;
}
}
return enabled;
}
static void pineview_update_wm(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_crtc *crtc;
const struct cxsr_latency *latency;
u32 reg;
unsigned long wm;
latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev), dev_priv->is_ddr3,
dev_priv->fsb_freq, dev_priv->mem_freq);
if (!latency) {
DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
pineview_disable_cxsr(dev);
return;
}
crtc = single_enabled_crtc(dev);
if (crtc) {
int clock = crtc->mode.clock;
int pixel_size = crtc->fb->bits_per_pixel / 8;
/* Display SR */
wm = intel_calculate_wm(clock, &pineview_display_wm,
pineview_display_wm.fifo_size,
pixel_size, latency->display_sr);
reg = I915_READ(DSPFW1);
reg &= ~DSPFW_SR_MASK;
reg |= wm << DSPFW_SR_SHIFT;
I915_WRITE(DSPFW1, reg);
DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
/* cursor SR */
wm = intel_calculate_wm(clock, &pineview_cursor_wm,
pineview_display_wm.fifo_size,
pixel_size, latency->cursor_sr);
reg = I915_READ(DSPFW3);
reg &= ~DSPFW_CURSOR_SR_MASK;
reg |= (wm & 0x3f) << DSPFW_CURSOR_SR_SHIFT;
I915_WRITE(DSPFW3, reg);
/* Display HPLL off SR */
wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
pineview_display_hplloff_wm.fifo_size,
pixel_size, latency->display_hpll_disable);
reg = I915_READ(DSPFW3);
reg &= ~DSPFW_HPLL_SR_MASK;
reg |= wm & DSPFW_HPLL_SR_MASK;
I915_WRITE(DSPFW3, reg);
/* cursor HPLL off SR */
wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
pineview_display_hplloff_wm.fifo_size,
pixel_size, latency->cursor_hpll_disable);
reg = I915_READ(DSPFW3);
reg &= ~DSPFW_HPLL_CURSOR_MASK;
reg |= (wm & 0x3f) << DSPFW_HPLL_CURSOR_SHIFT;
I915_WRITE(DSPFW3, reg);
DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
/* activate cxsr */
I915_WRITE(DSPFW3,
I915_READ(DSPFW3) | PINEVIEW_SELF_REFRESH_EN);
DRM_DEBUG_KMS("Self-refresh is enabled\n");
} else {
pineview_disable_cxsr(dev);
DRM_DEBUG_KMS("Self-refresh is disabled\n");
}
}
static bool g4x_compute_wm0(struct drm_device *dev,
int plane,
const struct intel_watermark_params *display,
int display_latency_ns,
const struct intel_watermark_params *cursor,
int cursor_latency_ns,
int *plane_wm,
int *cursor_wm)
{
struct drm_crtc *crtc;
int htotal, hdisplay, clock, pixel_size;
int line_time_us, line_count;
int entries, tlb_miss;
crtc = intel_get_crtc_for_plane(dev, plane);
if (!intel_crtc_active(crtc)) {
*cursor_wm = cursor->guard_size;
*plane_wm = display->guard_size;
return false;
}
htotal = crtc->mode.htotal;
hdisplay = crtc->mode.hdisplay;
clock = crtc->mode.clock;
pixel_size = crtc->fb->bits_per_pixel / 8;
/* Use the small buffer method to calculate plane watermark */
entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000;
tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8;
if (tlb_miss > 0)
entries += tlb_miss;
entries = DIV_ROUND_UP(entries, display->cacheline_size);
*plane_wm = entries + display->guard_size;
if (*plane_wm > (int)display->max_wm)
*plane_wm = display->max_wm;
/* Use the large buffer method to calculate cursor watermark */
line_time_us = ((htotal * 1000) / clock);
line_count = (cursor_latency_ns / line_time_us + 1000) / 1000;
entries = line_count * 64 * pixel_size;
tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;
if (tlb_miss > 0)
entries += tlb_miss;
entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
*cursor_wm = entries + cursor->guard_size;
if (*cursor_wm > (int)cursor->max_wm)
*cursor_wm = (int)cursor->max_wm;
return true;
}
/*
* Check the wm result.
*
* If any calculated watermark values is larger than the maximum value that
* can be programmed into the associated watermark register, that watermark
* must be disabled.
*/
static bool g4x_check_srwm(struct drm_device *dev,
int display_wm, int cursor_wm,
const struct intel_watermark_params *display,
const struct intel_watermark_params *cursor)
{
DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n",
display_wm, cursor_wm);
if (display_wm > display->max_wm) {
DRM_DEBUG_KMS("display watermark is too large(%d/%ld), disabling\n",
display_wm, display->max_wm);
return false;
}
if (cursor_wm > cursor->max_wm) {
DRM_DEBUG_KMS("cursor watermark is too large(%d/%ld), disabling\n",
cursor_wm, cursor->max_wm);
return false;
}
if (!(display_wm || cursor_wm)) {
DRM_DEBUG_KMS("SR latency is 0, disabling\n");
return false;
}
return true;
}
static bool g4x_compute_srwm(struct drm_device *dev,
int plane,
int latency_ns,
const struct intel_watermark_params *display,
const struct intel_watermark_params *cursor,
int *display_wm, int *cursor_wm)
{
struct drm_crtc *crtc;
int hdisplay, htotal, pixel_size, clock;
unsigned long line_time_us;
int line_count, line_size;
int small, large;
int entries;
if (!latency_ns) {
*display_wm = *cursor_wm = 0;
return false;
}
crtc = intel_get_crtc_for_plane(dev, plane);
hdisplay = crtc->mode.hdisplay;
htotal = crtc->mode.htotal;
clock = crtc->mode.clock;
pixel_size = crtc->fb->bits_per_pixel / 8;
line_time_us = (htotal * 1000) / clock;
line_count = (latency_ns / line_time_us + 1000) / 1000;
line_size = hdisplay * pixel_size;
/* Use the minimum of the small and large buffer method for primary */
small = ((clock * pixel_size / 1000) * latency_ns) / 1000;
large = line_count * line_size;
entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
*display_wm = entries + display->guard_size;
/* calculate the self-refresh watermark for display cursor */
entries = line_count * pixel_size * 64;
entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
*cursor_wm = entries + cursor->guard_size;
return g4x_check_srwm(dev,
*display_wm, *cursor_wm,
display, cursor);
}
static bool vlv_compute_drain_latency(struct drm_device *dev,
int plane,
int *plane_prec_mult,
int *plane_dl,
int *cursor_prec_mult,
int *cursor_dl)
{
struct drm_crtc *crtc;
int clock, pixel_size;
int entries;
crtc = intel_get_crtc_for_plane(dev, plane);
if (!intel_crtc_active(crtc))
return false;
clock = crtc->mode.clock; /* VESA DOT Clock */
pixel_size = crtc->fb->bits_per_pixel / 8; /* BPP */
entries = (clock / 1000) * pixel_size;
*plane_prec_mult = (entries > 256) ?
DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_16;
*plane_dl = (64 * (*plane_prec_mult) * 4) / ((clock / 1000) *
pixel_size);
entries = (clock / 1000) * 4; /* BPP is always 4 for cursor */
*cursor_prec_mult = (entries > 256) ?
DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_16;
*cursor_dl = (64 * (*cursor_prec_mult) * 4) / ((clock / 1000) * 4);
return true;
}
/*
* Update drain latency registers of memory arbiter
*
* Valleyview SoC has a new memory arbiter and needs drain latency registers
* to be programmed. Each plane has a drain latency multiplier and a drain
* latency value.
*/
static void vlv_update_drain_latency(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int planea_prec, planea_dl, planeb_prec, planeb_dl;
int cursora_prec, cursora_dl, cursorb_prec, cursorb_dl;
int plane_prec_mult, cursor_prec_mult; /* Precision multiplier is
either 16 or 32 */
/* For plane A, Cursor A */
if (vlv_compute_drain_latency(dev, 0, &plane_prec_mult, &planea_dl,
&cursor_prec_mult, &cursora_dl)) {
cursora_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
DDL_CURSORA_PRECISION_32 : DDL_CURSORA_PRECISION_16;
planea_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
DDL_PLANEA_PRECISION_32 : DDL_PLANEA_PRECISION_16;
I915_WRITE(VLV_DDL1, cursora_prec |
(cursora_dl << DDL_CURSORA_SHIFT) |
planea_prec | planea_dl);
}
/* For plane B, Cursor B */
if (vlv_compute_drain_latency(dev, 1, &plane_prec_mult, &planeb_dl,
&cursor_prec_mult, &cursorb_dl)) {
cursorb_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
DDL_CURSORB_PRECISION_32 : DDL_CURSORB_PRECISION_16;
planeb_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
DDL_PLANEB_PRECISION_32 : DDL_PLANEB_PRECISION_16;
I915_WRITE(VLV_DDL2, cursorb_prec |
(cursorb_dl << DDL_CURSORB_SHIFT) |
planeb_prec | planeb_dl);
}
}
#define single_plane_enabled(mask) is_power_of_2(mask)
static void valleyview_update_wm(struct drm_device *dev)
{
static const int sr_latency_ns = 12000;
struct drm_i915_private *dev_priv = dev->dev_private;
int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
int plane_sr, cursor_sr;
int ignore_plane_sr, ignore_cursor_sr;
unsigned int enabled = 0;
vlv_update_drain_latency(dev);
if (g4x_compute_wm0(dev, 0,
&valleyview_wm_info, latency_ns,
&valleyview_cursor_wm_info, latency_ns,
&planea_wm, &cursora_wm))
enabled |= 1;
if (g4x_compute_wm0(dev, 1,
&valleyview_wm_info, latency_ns,
&valleyview_cursor_wm_info, latency_ns,
&planeb_wm, &cursorb_wm))
enabled |= 2;
if (single_plane_enabled(enabled) &&
g4x_compute_srwm(dev, ffs(enabled) - 1,
sr_latency_ns,
&valleyview_wm_info,
&valleyview_cursor_wm_info,
&plane_sr, &ignore_cursor_sr) &&
g4x_compute_srwm(dev, ffs(enabled) - 1,
2*sr_latency_ns,
&valleyview_wm_info,
&valleyview_cursor_wm_info,
&ignore_plane_sr, &cursor_sr)) {
I915_WRITE(FW_BLC_SELF_VLV, FW_CSPWRDWNEN);
} else {
I915_WRITE(FW_BLC_SELF_VLV,
I915_READ(FW_BLC_SELF_VLV) & ~FW_CSPWRDWNEN);
plane_sr = cursor_sr = 0;
}
DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
planea_wm, cursora_wm,
planeb_wm, cursorb_wm,
plane_sr, cursor_sr);
I915_WRITE(DSPFW1,
(plane_sr << DSPFW_SR_SHIFT) |
(cursorb_wm << DSPFW_CURSORB_SHIFT) |
(planeb_wm << DSPFW_PLANEB_SHIFT) |
planea_wm);
I915_WRITE(DSPFW2,
(I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
(cursora_wm << DSPFW_CURSORA_SHIFT));
I915_WRITE(DSPFW3,
(I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) |
(cursor_sr << DSPFW_CURSOR_SR_SHIFT));
}
static void g4x_update_wm(struct drm_device *dev)
{
static const int sr_latency_ns = 12000;
struct drm_i915_private *dev_priv = dev->dev_private;
int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
int plane_sr, cursor_sr;
unsigned int enabled = 0;
if (g4x_compute_wm0(dev, 0,
&g4x_wm_info, latency_ns,
&g4x_cursor_wm_info, latency_ns,
&planea_wm, &cursora_wm))
enabled |= 1;
if (g4x_compute_wm0(dev, 1,
&g4x_wm_info, latency_ns,
&g4x_cursor_wm_info, latency_ns,
&planeb_wm, &cursorb_wm))
enabled |= 2;
if (single_plane_enabled(enabled) &&
g4x_compute_srwm(dev, ffs(enabled) - 1,
sr_latency_ns,
&g4x_wm_info,
&g4x_cursor_wm_info,
&plane_sr, &cursor_sr)) {
I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN);
} else {
I915_WRITE(FW_BLC_SELF,
I915_READ(FW_BLC_SELF) & ~FW_BLC_SELF_EN);
plane_sr = cursor_sr = 0;
}
DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
planea_wm, cursora_wm,
planeb_wm, cursorb_wm,
plane_sr, cursor_sr);
I915_WRITE(DSPFW1,
(plane_sr << DSPFW_SR_SHIFT) |
(cursorb_wm << DSPFW_CURSORB_SHIFT) |
(planeb_wm << DSPFW_PLANEB_SHIFT) |
planea_wm);
I915_WRITE(DSPFW2,
(I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
(cursora_wm << DSPFW_CURSORA_SHIFT));
/* HPLL off in SR has some issues on G4x... disable it */
I915_WRITE(DSPFW3,
(I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) |
(cursor_sr << DSPFW_CURSOR_SR_SHIFT));
}
static void i965_update_wm(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_crtc *crtc;
int srwm = 1;
int cursor_sr = 16;
/* Calc sr entries for one plane configs */
crtc = single_enabled_crtc(dev);
if (crtc) {
/* self-refresh has much higher latency */
static const int sr_latency_ns = 12000;
int clock = crtc->mode.clock;
int htotal = crtc->mode.htotal;
int hdisplay = crtc->mode.hdisplay;
int pixel_size = crtc->fb->bits_per_pixel / 8;
unsigned long line_time_us;
int entries;
line_time_us = ((htotal * 1000) / clock);
/* Use ns/us then divide to preserve precision */
entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
pixel_size * hdisplay;
entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
srwm = I965_FIFO_SIZE - entries;
if (srwm < 0)
srwm = 1;
srwm &= 0x1ff;
DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
entries, srwm);
entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
pixel_size * 64;
entries = DIV_ROUND_UP(entries,
i965_cursor_wm_info.cacheline_size);
cursor_sr = i965_cursor_wm_info.fifo_size -
(entries + i965_cursor_wm_info.guard_size);
if (cursor_sr > i965_cursor_wm_info.max_wm)
cursor_sr = i965_cursor_wm_info.max_wm;
DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
"cursor %d\n", srwm, cursor_sr);
if (IS_CRESTLINE(dev))
I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN);
} else {
/* Turn off self refresh if both pipes are enabled */
if (IS_CRESTLINE(dev))
I915_WRITE(FW_BLC_SELF, I915_READ(FW_BLC_SELF)
& ~FW_BLC_SELF_EN);
}
DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
srwm);
/* 965 has limitations... */
I915_WRITE(DSPFW1, (srwm << DSPFW_SR_SHIFT) |
(8 << 16) | (8 << 8) | (8 << 0));
I915_WRITE(DSPFW2, (8 << 8) | (8 << 0));
/* update cursor SR watermark */
I915_WRITE(DSPFW3, (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
}
static void i9xx_update_wm(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
const struct intel_watermark_params *wm_info;
uint32_t fwater_lo;
uint32_t fwater_hi;
int cwm, srwm = 1;
int fifo_size;
int planea_wm, planeb_wm;
struct drm_crtc *crtc, *enabled = NULL;
if (IS_I945GM(dev))
wm_info = &i945_wm_info;
else if (!IS_GEN2(dev))
wm_info = &i915_wm_info;
else
wm_info = &i855_wm_info;
fifo_size = dev_priv->display.get_fifo_size(dev, 0);
crtc = intel_get_crtc_for_plane(dev, 0);
if (intel_crtc_active(crtc)) {
int cpp = crtc->fb->bits_per_pixel / 8;
if (IS_GEN2(dev))
cpp = 4;
planea_wm = intel_calculate_wm(crtc->mode.clock,
wm_info, fifo_size, cpp,
latency_ns);
enabled = crtc;
} else
planea_wm = fifo_size - wm_info->guard_size;
fifo_size = dev_priv->display.get_fifo_size(dev, 1);
crtc = intel_get_crtc_for_plane(dev, 1);
if (intel_crtc_active(crtc)) {
int cpp = crtc->fb->bits_per_pixel / 8;
if (IS_GEN2(dev))
cpp = 4;
planeb_wm = intel_calculate_wm(crtc->mode.clock,
wm_info, fifo_size, cpp,
latency_ns);
if (enabled == NULL)
enabled = crtc;
else
enabled = NULL;
} else
planeb_wm = fifo_size - wm_info->guard_size;
DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
/*
* Overlay gets an aggressive default since video jitter is bad.
*/
cwm = 2;
/* Play safe and disable self-refresh before adjusting watermarks. */
if (IS_I945G(dev) || IS_I945GM(dev))
I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN_MASK | 0);
else if (IS_I915GM(dev))
I915_WRITE(INSTPM, I915_READ(INSTPM) & ~INSTPM_SELF_EN);
/* Calc sr entries for one plane configs */
if (HAS_FW_BLC(dev) && enabled) {
/* self-refresh has much higher latency */
static const int sr_latency_ns = 6000;
int clock = enabled->mode.clock;
int htotal = enabled->mode.htotal;
int hdisplay = enabled->mode.hdisplay;
int pixel_size = enabled->fb->bits_per_pixel / 8;
unsigned long line_time_us;
int entries;
line_time_us = (htotal * 1000) / clock;
/* Use ns/us then divide to preserve precision */
entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
pixel_size * hdisplay;
entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
srwm = wm_info->fifo_size - entries;
if (srwm < 0)
srwm = 1;
if (IS_I945G(dev) || IS_I945GM(dev))
I915_WRITE(FW_BLC_SELF,
FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
else if (IS_I915GM(dev))
I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
}
DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
planea_wm, planeb_wm, cwm, srwm);
fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
fwater_hi = (cwm & 0x1f);
/* Set request length to 8 cachelines per fetch */
fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
fwater_hi = fwater_hi | (1 << 8);
I915_WRITE(FW_BLC, fwater_lo);
I915_WRITE(FW_BLC2, fwater_hi);
if (HAS_FW_BLC(dev)) {
if (enabled) {
if (IS_I945G(dev) || IS_I945GM(dev))
I915_WRITE(FW_BLC_SELF,
FW_BLC_SELF_EN_MASK | FW_BLC_SELF_EN);
else if (IS_I915GM(dev))
I915_WRITE(INSTPM, I915_READ(INSTPM) | INSTPM_SELF_EN);
DRM_DEBUG_KMS("memory self refresh enabled\n");
} else
DRM_DEBUG_KMS("memory self refresh disabled\n");
}
}
static void i830_update_wm(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_crtc *crtc;
uint32_t fwater_lo;
int planea_wm;
crtc = single_enabled_crtc(dev);
if (crtc == NULL)
return;
planea_wm = intel_calculate_wm(crtc->mode.clock, &i830_wm_info,
dev_priv->display.get_fifo_size(dev, 0),
4, latency_ns);
fwater_lo = I915_READ(FW_BLC) & ~0xfff;
fwater_lo |= (3<<8) | planea_wm;
DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
I915_WRITE(FW_BLC, fwater_lo);
}
#define ILK_LP0_PLANE_LATENCY 700
#define ILK_LP0_CURSOR_LATENCY 1300
/*
* Check the wm result.
*
* If any calculated watermark values is larger than the maximum value that
* can be programmed into the associated watermark register, that watermark
* must be disabled.
*/
static bool ironlake_check_srwm(struct drm_device *dev, int level,
int fbc_wm, int display_wm, int cursor_wm,
const struct intel_watermark_params *display,
const struct intel_watermark_params *cursor)
{
struct drm_i915_private *dev_priv = dev->dev_private;
DRM_DEBUG_KMS("watermark %d: display plane %d, fbc lines %d,"
" cursor %d\n", level, display_wm, fbc_wm, cursor_wm);
if (fbc_wm > SNB_FBC_MAX_SRWM) {
DRM_DEBUG_KMS("fbc watermark(%d) is too large(%d), disabling wm%d+\n",
fbc_wm, SNB_FBC_MAX_SRWM, level);
/* fbc has it's own way to disable FBC WM */
I915_WRITE(DISP_ARB_CTL,
I915_READ(DISP_ARB_CTL) | DISP_FBC_WM_DIS);
return false;
}
if (display_wm > display->max_wm) {
DRM_DEBUG_KMS("display watermark(%d) is too large(%d), disabling wm%d+\n",
display_wm, SNB_DISPLAY_MAX_SRWM, level);
return false;
}
if (cursor_wm > cursor->max_wm) {
DRM_DEBUG_KMS("cursor watermark(%d) is too large(%d), disabling wm%d+\n",
cursor_wm, SNB_CURSOR_MAX_SRWM, level);
return false;
}
if (!(fbc_wm || display_wm || cursor_wm)) {
DRM_DEBUG_KMS("latency %d is 0, disabling wm%d+\n", level, level);
return false;
}
return true;
}
/*
* Compute watermark values of WM[1-3],
*/
static bool ironlake_compute_srwm(struct drm_device *dev, int level, int plane,
int latency_ns,
const struct intel_watermark_params *display,
const struct intel_watermark_params *cursor,
int *fbc_wm, int *display_wm, int *cursor_wm)
{
struct drm_crtc *crtc;
unsigned long line_time_us;
int hdisplay, htotal, pixel_size, clock;
int line_count, line_size;
int small, large;
int entries;
if (!latency_ns) {
*fbc_wm = *display_wm = *cursor_wm = 0;
return false;
}
crtc = intel_get_crtc_for_plane(dev, plane);
hdisplay = crtc->mode.hdisplay;
htotal = crtc->mode.htotal;
clock = crtc->mode.clock;
pixel_size = crtc->fb->bits_per_pixel / 8;
line_time_us = (htotal * 1000) / clock;
line_count = (latency_ns / line_time_us + 1000) / 1000;
line_size = hdisplay * pixel_size;
/* Use the minimum of the small and large buffer method for primary */
small = ((clock * pixel_size / 1000) * latency_ns) / 1000;
large = line_count * line_size;
entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
*display_wm = entries + display->guard_size;
/*
* Spec says:
* FBC WM = ((Final Primary WM * 64) / number of bytes per line) + 2
*/
*fbc_wm = DIV_ROUND_UP(*display_wm * 64, line_size) + 2;
/* calculate the self-refresh watermark for display cursor */
entries = line_count * pixel_size * 64;
entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
*cursor_wm = entries + cursor->guard_size;
return ironlake_check_srwm(dev, level,
*fbc_wm, *display_wm, *cursor_wm,
display, cursor);
}
static void ironlake_update_wm(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int fbc_wm, plane_wm, cursor_wm;
unsigned int enabled;
enabled = 0;
if (g4x_compute_wm0(dev, 0,
&ironlake_display_wm_info,
ILK_LP0_PLANE_LATENCY,
&ironlake_cursor_wm_info,
ILK_LP0_CURSOR_LATENCY,
&plane_wm, &cursor_wm)) {
I915_WRITE(WM0_PIPEA_ILK,
(plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm);
DRM_DEBUG_KMS("FIFO watermarks For pipe A -"
" plane %d, " "cursor: %d\n",
plane_wm, cursor_wm);
enabled |= 1;
}
if (g4x_compute_wm0(dev, 1,
&ironlake_display_wm_info,
ILK_LP0_PLANE_LATENCY,
&ironlake_cursor_wm_info,
ILK_LP0_CURSOR_LATENCY,
&plane_wm, &cursor_wm)) {
I915_WRITE(WM0_PIPEB_ILK,
(plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm);
DRM_DEBUG_KMS("FIFO watermarks For pipe B -"
" plane %d, cursor: %d\n",
plane_wm, cursor_wm);
enabled |= 2;
}
/*
* Calculate and update the self-refresh watermark only when one
* display plane is used.
*/
I915_WRITE(WM3_LP_ILK, 0);
I915_WRITE(WM2_LP_ILK, 0);
I915_WRITE(WM1_LP_ILK, 0);
if (!single_plane_enabled(enabled))
return;
enabled = ffs(enabled) - 1;
/* WM1 */
if (!ironlake_compute_srwm(dev, 1, enabled,
ILK_READ_WM1_LATENCY() * 500,
&ironlake_display_srwm_info,
&ironlake_cursor_srwm_info,
&fbc_wm, &plane_wm, &cursor_wm))
return;
I915_WRITE(WM1_LP_ILK,
WM1_LP_SR_EN |
(ILK_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) |
(fbc_wm << WM1_LP_FBC_SHIFT) |
(plane_wm << WM1_LP_SR_SHIFT) |
cursor_wm);
/* WM2 */
if (!ironlake_compute_srwm(dev, 2, enabled,
ILK_READ_WM2_LATENCY() * 500,
&ironlake_display_srwm_info,
&ironlake_cursor_srwm_info,
&fbc_wm, &plane_wm, &cursor_wm))
return;
I915_WRITE(WM2_LP_ILK,
WM2_LP_EN |
(ILK_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) |
(fbc_wm << WM1_LP_FBC_SHIFT) |
(plane_wm << WM1_LP_SR_SHIFT) |
cursor_wm);
/*
* WM3 is unsupported on ILK, probably because we don't have latency
* data for that power state
*/
}
static void sandybridge_update_wm(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int latency = SNB_READ_WM0_LATENCY() * 100; /* In unit 0.1us */
u32 val;
int fbc_wm, plane_wm, cursor_wm;
unsigned int enabled;
enabled = 0;
if (g4x_compute_wm0(dev, 0,
&sandybridge_display_wm_info, latency,
&sandybridge_cursor_wm_info, latency,
&plane_wm, &cursor_wm)) {
val = I915_READ(WM0_PIPEA_ILK);
val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
I915_WRITE(WM0_PIPEA_ILK, val |
((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
DRM_DEBUG_KMS("FIFO watermarks For pipe A -"
" plane %d, " "cursor: %d\n",
plane_wm, cursor_wm);
enabled |= 1;
}
if (g4x_compute_wm0(dev, 1,
&sandybridge_display_wm_info, latency,
&sandybridge_cursor_wm_info, latency,
&plane_wm, &cursor_wm)) {
val = I915_READ(WM0_PIPEB_ILK);
val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
I915_WRITE(WM0_PIPEB_ILK, val |
((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
DRM_DEBUG_KMS("FIFO watermarks For pipe B -"
" plane %d, cursor: %d\n",
plane_wm, cursor_wm);
enabled |= 2;
}
/*
* Calculate and update the self-refresh watermark only when one
* display plane is used.
*
* SNB support 3 levels of watermark.
*
* WM1/WM2/WM2 watermarks have to be enabled in the ascending order,
* and disabled in the descending order
*
*/
I915_WRITE(WM3_LP_ILK, 0);
I915_WRITE(WM2_LP_ILK, 0);
I915_WRITE(WM1_LP_ILK, 0);
if (!single_plane_enabled(enabled) ||
dev_priv->sprite_scaling_enabled)
return;
enabled = ffs(enabled) - 1;
/* WM1 */
if (!ironlake_compute_srwm(dev, 1, enabled,
SNB_READ_WM1_LATENCY() * 500,
&sandybridge_display_srwm_info,
&sandybridge_cursor_srwm_info,
&fbc_wm, &plane_wm, &cursor_wm))
return;
I915_WRITE(WM1_LP_ILK,
WM1_LP_SR_EN |
(SNB_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) |
(fbc_wm << WM1_LP_FBC_SHIFT) |
(plane_wm << WM1_LP_SR_SHIFT) |
cursor_wm);
/* WM2 */
if (!ironlake_compute_srwm(dev, 2, enabled,
SNB_READ_WM2_LATENCY() * 500,
&sandybridge_display_srwm_info,
&sandybridge_cursor_srwm_info,
&fbc_wm, &plane_wm, &cursor_wm))
return;
I915_WRITE(WM2_LP_ILK,
WM2_LP_EN |
(SNB_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) |
(fbc_wm << WM1_LP_FBC_SHIFT) |
(plane_wm << WM1_LP_SR_SHIFT) |
cursor_wm);
/* WM3 */
if (!ironlake_compute_srwm(dev, 3, enabled,
SNB_READ_WM3_LATENCY() * 500,
&sandybridge_display_srwm_info,
&sandybridge_cursor_srwm_info,
&fbc_wm, &plane_wm, &cursor_wm))
return;
I915_WRITE(WM3_LP_ILK,
WM3_LP_EN |
(SNB_READ_WM3_LATENCY() << WM1_LP_LATENCY_SHIFT) |
(fbc_wm << WM1_LP_FBC_SHIFT) |
(plane_wm << WM1_LP_SR_SHIFT) |
cursor_wm);
}
static void ivybridge_update_wm(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int latency = SNB_READ_WM0_LATENCY() * 100; /* In unit 0.1us */
u32 val;
int fbc_wm, plane_wm, cursor_wm;
int ignore_fbc_wm, ignore_plane_wm, ignore_cursor_wm;
unsigned int enabled;
enabled = 0;
if (g4x_compute_wm0(dev, 0,
&sandybridge_display_wm_info, latency,
&sandybridge_cursor_wm_info, latency,
&plane_wm, &cursor_wm)) {
val = I915_READ(WM0_PIPEA_ILK);
val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
I915_WRITE(WM0_PIPEA_ILK, val |
((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
DRM_DEBUG_KMS("FIFO watermarks For pipe A -"
" plane %d, " "cursor: %d\n",
plane_wm, cursor_wm);
enabled |= 1;
}
if (g4x_compute_wm0(dev, 1,
&sandybridge_display_wm_info, latency,
&sandybridge_cursor_wm_info, latency,
&plane_wm, &cursor_wm)) {
val = I915_READ(WM0_PIPEB_ILK);
val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
I915_WRITE(WM0_PIPEB_ILK, val |
((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
DRM_DEBUG_KMS("FIFO watermarks For pipe B -"
" plane %d, cursor: %d\n",
plane_wm, cursor_wm);
enabled |= 2;
}
if (g4x_compute_wm0(dev, 2,
&sandybridge_display_wm_info, latency,
&sandybridge_cursor_wm_info, latency,
&plane_wm, &cursor_wm)) {
val = I915_READ(WM0_PIPEC_IVB);
val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
I915_WRITE(WM0_PIPEC_IVB, val |
((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
DRM_DEBUG_KMS("FIFO watermarks For pipe C -"
" plane %d, cursor: %d\n",
plane_wm, cursor_wm);
enabled |= 3;
}
/*
* Calculate and update the self-refresh watermark only when one
* display plane is used.
*
* SNB support 3 levels of watermark.
*
* WM1/WM2/WM2 watermarks have to be enabled in the ascending order,
* and disabled in the descending order
*
*/
I915_WRITE(WM3_LP_ILK, 0);
I915_WRITE(WM2_LP_ILK, 0);
I915_WRITE(WM1_LP_ILK, 0);
if (!single_plane_enabled(enabled) ||
dev_priv->sprite_scaling_enabled)
return;
enabled = ffs(enabled) - 1;
/* WM1 */
if (!ironlake_compute_srwm(dev, 1, enabled,
SNB_READ_WM1_LATENCY() * 500,
&sandybridge_display_srwm_info,
&sandybridge_cursor_srwm_info,
&fbc_wm, &plane_wm, &cursor_wm))
return;
I915_WRITE(WM1_LP_ILK,
WM1_LP_SR_EN |
(SNB_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) |
(fbc_wm << WM1_LP_FBC_SHIFT) |
(plane_wm << WM1_LP_SR_SHIFT) |
cursor_wm);
/* WM2 */
if (!ironlake_compute_srwm(dev, 2, enabled,
SNB_READ_WM2_LATENCY() * 500,
&sandybridge_display_srwm_info,
&sandybridge_cursor_srwm_info,
&fbc_wm, &plane_wm, &cursor_wm))
return;
I915_WRITE(WM2_LP_ILK,
WM2_LP_EN |
(SNB_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) |
(fbc_wm << WM1_LP_FBC_SHIFT) |
(plane_wm << WM1_LP_SR_SHIFT) |
cursor_wm);
/* WM3, note we have to correct the cursor latency */
if (!ironlake_compute_srwm(dev, 3, enabled,
SNB_READ_WM3_LATENCY() * 500,
&sandybridge_display_srwm_info,
&sandybridge_cursor_srwm_info,
&fbc_wm, &plane_wm, &ignore_cursor_wm) ||
!ironlake_compute_srwm(dev, 3, enabled,
2 * SNB_READ_WM3_LATENCY() * 500,
&sandybridge_display_srwm_info,
&sandybridge_cursor_srwm_info,
&ignore_fbc_wm, &ignore_plane_wm, &cursor_wm))
return;
I915_WRITE(WM3_LP_ILK,
WM3_LP_EN |
(SNB_READ_WM3_LATENCY() << WM1_LP_LATENCY_SHIFT) |
(fbc_wm << WM1_LP_FBC_SHIFT) |
(plane_wm << WM1_LP_SR_SHIFT) |
cursor_wm);
}
static void
haswell_update_linetime_wm(struct drm_device *dev, int pipe,
struct drm_display_mode *mode)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u32 temp;
temp = I915_READ(PIPE_WM_LINETIME(pipe));
temp &= ~PIPE_WM_LINETIME_MASK;
/* The WM are computed with base on how long it takes to fill a single
* row at the given clock rate, multiplied by 8.
* */
temp |= PIPE_WM_LINETIME_TIME(
((mode->crtc_hdisplay * 1000) / mode->clock) * 8);
/* IPS watermarks are only used by pipe A, and are ignored by
* pipes B and C. They are calculated similarly to the common
* linetime values, except that we are using CD clock frequency
* in MHz instead of pixel rate for the division.
*
* This is a placeholder for the IPS watermark calculation code.
*/
I915_WRITE(PIPE_WM_LINETIME(pipe), temp);
}
static bool
sandybridge_compute_sprite_wm(struct drm_device *dev, int plane,
uint32_t sprite_width, int pixel_size,
const struct intel_watermark_params *display,
int display_latency_ns, int *sprite_wm)
{
struct drm_crtc *crtc;
int clock;
int entries, tlb_miss;
crtc = intel_get_crtc_for_plane(dev, plane);
if (!intel_crtc_active(crtc)) {
*sprite_wm = display->guard_size;
return false;
}
clock = crtc->mode.clock;
/* Use the small buffer method to calculate the sprite watermark */
entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000;
tlb_miss = display->fifo_size*display->cacheline_size -
sprite_width * 8;
if (tlb_miss > 0)
entries += tlb_miss;
entries = DIV_ROUND_UP(entries, display->cacheline_size);
*sprite_wm = entries + display->guard_size;
if (*sprite_wm > (int)display->max_wm)
*sprite_wm = display->max_wm;
return true;
}
static bool
sandybridge_compute_sprite_srwm(struct drm_device *dev, int plane,
uint32_t sprite_width, int pixel_size,
const struct intel_watermark_params *display,
int latency_ns, int *sprite_wm)
{
struct drm_crtc *crtc;
unsigned long line_time_us;
int clock;
int line_count, line_size;
int small, large;
int entries;
if (!latency_ns) {
*sprite_wm = 0;
return false;
}
crtc = intel_get_crtc_for_plane(dev, plane);
clock = crtc->mode.clock;
if (!clock) {
*sprite_wm = 0;
return false;
}
line_time_us = (sprite_width * 1000) / clock;
if (!line_time_us) {
*sprite_wm = 0;
return false;
}
line_count = (latency_ns / line_time_us + 1000) / 1000;
line_size = sprite_width * pixel_size;
/* Use the minimum of the small and large buffer method for primary */
small = ((clock * pixel_size / 1000) * latency_ns) / 1000;
large = line_count * line_size;
entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
*sprite_wm = entries + display->guard_size;
return *sprite_wm > 0x3ff ? false : true;
}
static void sandybridge_update_sprite_wm(struct drm_device *dev, int pipe,
uint32_t sprite_width, int pixel_size)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int latency = SNB_READ_WM0_LATENCY() * 100; /* In unit 0.1us */
u32 val;
int sprite_wm, reg;
int ret;
switch (pipe) {
case 0:
reg = WM0_PIPEA_ILK;
break;
case 1:
reg = WM0_PIPEB_ILK;
break;
case 2:
reg = WM0_PIPEC_IVB;
break;
default:
return; /* bad pipe */
}
ret = sandybridge_compute_sprite_wm(dev, pipe, sprite_width, pixel_size,
&sandybridge_display_wm_info,
latency, &sprite_wm);
if (!ret) {
DRM_DEBUG_KMS("failed to compute sprite wm for pipe %c\n",
pipe_name(pipe));
return;
}
val = I915_READ(reg);
val &= ~WM0_PIPE_SPRITE_MASK;
I915_WRITE(reg, val | (sprite_wm << WM0_PIPE_SPRITE_SHIFT));
DRM_DEBUG_KMS("sprite watermarks For pipe %c - %d\n", pipe_name(pipe), sprite_wm);
ret = sandybridge_compute_sprite_srwm(dev, pipe, sprite_width,
pixel_size,
&sandybridge_display_srwm_info,
SNB_READ_WM1_LATENCY() * 500,
&sprite_wm);
if (!ret) {
DRM_DEBUG_KMS("failed to compute sprite lp1 wm on pipe %c\n",
pipe_name(pipe));
return;
}
I915_WRITE(WM1S_LP_ILK, sprite_wm);
/* Only IVB has two more LP watermarks for sprite */
if (!IS_IVYBRIDGE(dev))
return;
ret = sandybridge_compute_sprite_srwm(dev, pipe, sprite_width,
pixel_size,
&sandybridge_display_srwm_info,
SNB_READ_WM2_LATENCY() * 500,
&sprite_wm);
if (!ret) {
DRM_DEBUG_KMS("failed to compute sprite lp2 wm on pipe %c\n",
pipe_name(pipe));
return;
}
I915_WRITE(WM2S_LP_IVB, sprite_wm);
ret = sandybridge_compute_sprite_srwm(dev, pipe, sprite_width,
pixel_size,
&sandybridge_display_srwm_info,
SNB_READ_WM3_LATENCY() * 500,
&sprite_wm);
if (!ret) {
DRM_DEBUG_KMS("failed to compute sprite lp3 wm on pipe %c\n",
pipe_name(pipe));
return;
}
I915_WRITE(WM3S_LP_IVB, sprite_wm);
}
/**
* intel_update_watermarks - update FIFO watermark values based on current modes
*
* Calculate watermark values for the various WM regs based on current mode
* and plane configuration.
*
* There are several cases to deal with here:
* - normal (i.e. non-self-refresh)
* - self-refresh (SR) mode
* - lines are large relative to FIFO size (buffer can hold up to 2)
* - lines are small relative to FIFO size (buffer can hold more than 2
* lines), so need to account for TLB latency
*
* The normal calculation is:
* watermark = dotclock * bytes per pixel * latency
* where latency is platform & configuration dependent (we assume pessimal
* values here).
*
* The SR calculation is:
* watermark = (trunc(latency/line time)+1) * surface width *
* bytes per pixel
* where
* line time = htotal / dotclock
* surface width = hdisplay for normal plane and 64 for cursor
* and latency is assumed to be high, as above.
*
* The final value programmed to the register should always be rounded up,
* and include an extra 2 entries to account for clock crossings.
*
* We don't use the sprite, so we can ignore that. And on Crestline we have
* to set the non-SR watermarks to 8.
*/
void intel_update_watermarks(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
if (dev_priv->display.update_wm)
dev_priv->display.update_wm(dev);
}
void intel_update_linetime_watermarks(struct drm_device *dev,
int pipe, struct drm_display_mode *mode)
{
struct drm_i915_private *dev_priv = dev->dev_private;
if (dev_priv->display.update_linetime_wm)
dev_priv->display.update_linetime_wm(dev, pipe, mode);
}
void intel_update_sprite_watermarks(struct drm_device *dev, int pipe,
uint32_t sprite_width, int pixel_size)
{
struct drm_i915_private *dev_priv = dev->dev_private;
if (dev_priv->display.update_sprite_wm)
dev_priv->display.update_sprite_wm(dev, pipe, sprite_width,
pixel_size);
}
static struct drm_i915_gem_object *
intel_alloc_context_page(struct drm_device *dev)
{
struct drm_i915_gem_object *ctx;
int ret;
WARN_ON(!mutex_is_locked(&dev->struct_mutex));
ctx = i915_gem_alloc_object(dev, 4096);
if (!ctx) {
DRM_DEBUG("failed to alloc power context, RC6 disabled\n");
return NULL;
}
ret = i915_gem_object_pin(ctx, 4096, true, false);
if (ret) {
DRM_ERROR("failed to pin power context: %d\n", ret);
goto err_unref;
}
ret = i915_gem_object_set_to_gtt_domain(ctx, 1);
if (ret) {
DRM_ERROR("failed to set-domain on power context: %d\n", ret);
goto err_unpin;
}
return ctx;
err_unpin:
i915_gem_object_unpin(ctx);
err_unref:
drm_gem_object_unreference(&ctx->base);
return NULL;
}
/**
* Lock protecting IPS related data structures
*/
DEFINE_SPINLOCK(mchdev_lock);
/* Global for IPS driver to get at the current i915 device. Protected by
* mchdev_lock. */
static struct drm_i915_private *i915_mch_dev;
bool ironlake_set_drps(struct drm_device *dev, u8 val)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u16 rgvswctl;
assert_spin_locked(&mchdev_lock);
rgvswctl = I915_READ16(MEMSWCTL);
if (rgvswctl & MEMCTL_CMD_STS) {
DRM_DEBUG("gpu busy, RCS change rejected\n");
return false; /* still busy with another command */
}
rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
(val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
I915_WRITE16(MEMSWCTL, rgvswctl);
POSTING_READ16(MEMSWCTL);
rgvswctl |= MEMCTL_CMD_STS;
I915_WRITE16(MEMSWCTL, rgvswctl);
return true;
}
static void ironlake_enable_drps(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u32 rgvmodectl = I915_READ(MEMMODECTL);
u8 fmax, fmin, fstart, vstart;
spin_lock_irq(&mchdev_lock);
/* Enable temp reporting */
I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
/* 100ms RC evaluation intervals */
I915_WRITE(RCUPEI, 100000);
I915_WRITE(RCDNEI, 100000);
/* Set max/min thresholds to 90ms and 80ms respectively */
I915_WRITE(RCBMAXAVG, 90000);
I915_WRITE(RCBMINAVG, 80000);
I915_WRITE(MEMIHYST, 1);
/* Set up min, max, and cur for interrupt handling */
fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
MEMMODE_FSTART_SHIFT;
vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >>
PXVFREQ_PX_SHIFT;
dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
dev_priv->ips.fstart = fstart;
dev_priv->ips.max_delay = fstart;
dev_priv->ips.min_delay = fmin;
dev_priv->ips.cur_delay = fstart;
DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
fmax, fmin, fstart);
I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
/*
* Interrupts will be enabled in ironlake_irq_postinstall
*/
I915_WRITE(VIDSTART, vstart);
POSTING_READ(VIDSTART);
rgvmodectl |= MEMMODE_SWMODE_EN;
I915_WRITE(MEMMODECTL, rgvmodectl);
if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
DRM_ERROR("stuck trying to change perf mode\n");
mdelay(1);
ironlake_set_drps(dev, fstart);
dev_priv->ips.last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) +
I915_READ(0x112e0);
dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
dev_priv->ips.last_count2 = I915_READ(0x112f4);
getrawmonotonic(&dev_priv->ips.last_time2);
spin_unlock_irq(&mchdev_lock);
}
static void ironlake_disable_drps(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u16 rgvswctl;
spin_lock_irq(&mchdev_lock);
rgvswctl = I915_READ16(MEMSWCTL);
/* Ack interrupts, disable EFC interrupt */
I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
I915_WRITE(DEIIR, DE_PCU_EVENT);
I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
/* Go back to the starting frequency */
ironlake_set_drps(dev, dev_priv->ips.fstart);
mdelay(1);
rgvswctl |= MEMCTL_CMD_STS;
I915_WRITE(MEMSWCTL, rgvswctl);
mdelay(1);
spin_unlock_irq(&mchdev_lock);
}
/* There's a funny hw issue where the hw returns all 0 when reading from
* GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
* ourselves, instead of doing a rmw cycle (which might result in us clearing
* all limits and the gpu stuck at whatever frequency it is at atm).
*/
static u32 gen6_rps_limits(struct drm_i915_private *dev_priv, u8 *val)
{
u32 limits;
limits = 0;
if (*val >= dev_priv->rps.max_delay)
*val = dev_priv->rps.max_delay;
limits |= dev_priv->rps.max_delay << 24;
/* Only set the down limit when we've reached the lowest level to avoid
* getting more interrupts, otherwise leave this clear. This prevents a
* race in the hw when coming out of rc6: There's a tiny window where
* the hw runs at the minimal clock before selecting the desired
* frequency, if the down threshold expires in that window we will not
* receive a down interrupt. */
if (*val <= dev_priv->rps.min_delay) {
*val = dev_priv->rps.min_delay;
limits |= dev_priv->rps.min_delay << 16;
}
return limits;
}
void gen6_set_rps(struct drm_device *dev, u8 val)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u32 limits = gen6_rps_limits(dev_priv, &val);
WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
WARN_ON(val > dev_priv->rps.max_delay);
WARN_ON(val < dev_priv->rps.min_delay);
if (val == dev_priv->rps.cur_delay)
return;
if (IS_HASWELL(dev))
I915_WRITE(GEN6_RPNSWREQ,
HSW_FREQUENCY(val));
else
I915_WRITE(GEN6_RPNSWREQ,
GEN6_FREQUENCY(val) |
GEN6_OFFSET(0) |
GEN6_AGGRESSIVE_TURBO);
/* Make sure we continue to get interrupts
* until we hit the minimum or maximum frequencies.
*/
I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, limits);
POSTING_READ(GEN6_RPNSWREQ);
dev_priv->rps.cur_delay = val;
trace_intel_gpu_freq_change(val * 50);
}
void valleyview_set_rps(struct drm_device *dev, u8 val)
{
struct drm_i915_private *dev_priv = dev->dev_private;
unsigned long timeout = jiffies + msecs_to_jiffies(10);
u32 limits = gen6_rps_limits(dev_priv, &val);
u32 pval;
WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
WARN_ON(val > dev_priv->rps.max_delay);
WARN_ON(val < dev_priv->rps.min_delay);
DRM_DEBUG_DRIVER("gpu freq request from %d to %d\n",
vlv_gpu_freq(dev_priv->mem_freq,
dev_priv->rps.cur_delay),
vlv_gpu_freq(dev_priv->mem_freq, val));
if (val == dev_priv->rps.cur_delay)
return;
valleyview_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
do {
valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, &pval);
if (time_after(jiffies, timeout)) {
DRM_DEBUG_DRIVER("timed out waiting for Punit\n");
break;
}
udelay(10);
} while (pval & 1);
valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, &pval);
if ((pval >> 8) != val)
DRM_DEBUG_DRIVER("punit overrode freq: %d requested, but got %d\n",
val, pval >> 8);
/* Make sure we continue to get interrupts
* until we hit the minimum or maximum frequencies.
*/
I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, limits);
dev_priv->rps.cur_delay = pval >> 8;
trace_intel_gpu_freq_change(vlv_gpu_freq(dev_priv->mem_freq, val));
}
static void gen6_disable_rps(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
I915_WRITE(GEN6_RC_CONTROL, 0);
I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
I915_WRITE(GEN6_PMIER, 0);
/* Complete PM interrupt masking here doesn't race with the rps work
* item again unmasking PM interrupts because that is using a different
* register (PMIMR) to mask PM interrupts. The only risk is in leaving
* stale bits in PMIIR and PMIMR which gen6_enable_rps will clean up. */
spin_lock_irq(&dev_priv->rps.lock);
dev_priv->rps.pm_iir = 0;
spin_unlock_irq(&dev_priv->rps.lock);
I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
}
static void valleyview_disable_rps(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
I915_WRITE(GEN6_RC_CONTROL, 0);
I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
I915_WRITE(GEN6_PMIER, 0);
/* Complete PM interrupt masking here doesn't race with the rps work
* item again unmasking PM interrupts because that is using a different
* register (PMIMR) to mask PM interrupts. The only risk is in leaving
* stale bits in PMIIR and PMIMR which gen6_enable_rps will clean up. */
spin_lock_irq(&dev_priv->rps.lock);
dev_priv->rps.pm_iir = 0;
spin_unlock_irq(&dev_priv->rps.lock);
I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
}
int intel_enable_rc6(const struct drm_device *dev)
{
/* Respect the kernel parameter if it is set */
if (i915_enable_rc6 >= 0)
return i915_enable_rc6;
Revert "drm/i915: enable rc6 on ilk again" Even with the cumulative set of ilk w/a, rc6 is demonstrably still failing and causing GPU hangs as found by Peter Wu. So we need to disable it again until it is stable. This reverts commit 456470eb583f063ee84c6818251e638598be0fb8 Author: Daniel Vetter <daniel.vetter@ffwll.ch> Date: Wed Aug 8 23:35:40 2012 +0200 drm/i915: enable rc6 on ilk again and the follow-on commit cd7988eea561a70a4f98e431c1395f913672d626 Author: Daniel Vetter <daniel.vetter@ffwll.ch> Date: Sun Aug 26 20:33:18 2012 +0200 drm/i915: disable rc6 on ilk when vt-d is enabled Note: The situation around the gen4/5 gpu hangs that cropped up in 3.7 is rather strange. Most useful bisects have lead to commit 6c085a728cf000ac1865d66f8c9b52935558b328 Author: Chris Wilson <chris@chris-wilson.co.uk> Date: Mon Aug 20 11:40:46 2012 +0200 drm/i915: Track unbound pages or even later commits that affect the gem bo recycling, which all is way past the point where we re-enabled rc6. But somehow reverting/disabling those commits doesn't help, but disabling rc6 at least helps for many hangs on ilk. Obviously it doesn't change anything at all on gen4, and there are still strange issues left on gen5 (which we unfortunately can't readily reproduce). Also, the error_state signature of the hangs which can be fixed with this patch look remarkably different to those which seem to be unaffected by the rc6 settings: The rc6 hangs are in the ring, somewhere in the MI_FLUSH/PIPE_CONTROL sequence to make ilk coherent, wheras all the other hangs tend to be at a random point in the middle of the user batch. So it could also be that we have different issues. Until we grow more clue, this at least helps some users. Reported-by: Peter Wu <lekensteyn@gmail.com> References: https://bugs.freedesktop.org/show_bug.cgi?id=55984 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: Added note with some more details about the gen4/5 3.7 gpu hang regression.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-11-10 17:00:06 +07:00
/* Disable RC6 on Ironlake */
if (INTEL_INFO(dev)->gen == 5)
return 0;
if (IS_HASWELL(dev)) {
DRM_DEBUG_DRIVER("Haswell: only RC6 available\n");
return INTEL_RC6_ENABLE;
}
/* snb/ivb have more than one rc6 state. */
if (INTEL_INFO(dev)->gen == 6) {
DRM_DEBUG_DRIVER("Sandybridge: deep RC6 disabled\n");
return INTEL_RC6_ENABLE;
}
DRM_DEBUG_DRIVER("RC6 and deep RC6 enabled\n");
return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
}
static void gen6_enable_rps(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_ring_buffer *ring;
u32 rp_state_cap;
u32 gt_perf_status;
u32 rc6vids, pcu_mbox, rc6_mask = 0;
u32 gtfifodbg;
int rc6_mode;
int i, ret;
WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
/* Here begins a magic sequence of register writes to enable
* auto-downclocking.
*
* Perhaps there might be some value in exposing these to
* userspace...
*/
I915_WRITE(GEN6_RC_STATE, 0);
/* Clear the DBG now so we don't confuse earlier errors */
if ((gtfifodbg = I915_READ(GTFIFODBG))) {
DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
I915_WRITE(GTFIFODBG, gtfifodbg);
}
gen6_gt_force_wake_get(dev_priv);
rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
/* In units of 50MHz */
dev_priv->rps.hw_max = dev_priv->rps.max_delay = rp_state_cap & 0xff;
dev_priv->rps.min_delay = (rp_state_cap & 0xff0000) >> 16;
dev_priv->rps.cur_delay = 0;
/* disable the counters and set deterministic thresholds */
I915_WRITE(GEN6_RC_CONTROL, 0);
I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
for_each_ring(ring, dev_priv, i)
I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
I915_WRITE(GEN6_RC_SLEEP, 0);
I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
/* Check if we are enabling RC6 */
rc6_mode = intel_enable_rc6(dev_priv->dev);
if (rc6_mode & INTEL_RC6_ENABLE)
rc6_mask |= GEN6_RC_CTL_RC6_ENABLE;
/* We don't use those on Haswell */
if (!IS_HASWELL(dev)) {
if (rc6_mode & INTEL_RC6p_ENABLE)
rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
if (rc6_mode & INTEL_RC6pp_ENABLE)
rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
}
DRM_INFO("Enabling RC6 states: RC6 %s, RC6p %s, RC6pp %s\n",
(rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
(rc6_mask & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
(rc6_mask & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
I915_WRITE(GEN6_RC_CONTROL,
rc6_mask |
GEN6_RC_CTL_EI_MODE(1) |
GEN6_RC_CTL_HW_ENABLE);
if (IS_HASWELL(dev)) {
I915_WRITE(GEN6_RPNSWREQ,
HSW_FREQUENCY(10));
I915_WRITE(GEN6_RC_VIDEO_FREQ,
HSW_FREQUENCY(12));
} else {
I915_WRITE(GEN6_RPNSWREQ,
GEN6_FREQUENCY(10) |
GEN6_OFFSET(0) |
GEN6_AGGRESSIVE_TURBO);
I915_WRITE(GEN6_RC_VIDEO_FREQ,
GEN6_FREQUENCY(12));
}
I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
dev_priv->rps.max_delay << 24 |
dev_priv->rps.min_delay << 16);
drm/i915: use hsw rps tuning values everywhere on gen6+ James Bottomley reported [1] a massive power regression, due to the enabling of semaphores by default in 3.5. A workaround for him is to again disable semaphores. And indeed, his system has a very hard time to enter rc6 with semaphores enabled. Ben Widawsky run around with a kill-a-watt a lot and noticed: - There are indeed a few rare systems that seem to have a hard time entering rc6 when desktop-idle. - One machine, The Indestructible Toshiba regressed in this behaviour between 3.5 and 3.6 in a merge commit! So rc6 behaviour with the current setting seems to be highly timing dependent and not robust at all. - The behaviour James reported wrt semaphores seems to be a freak timing thing that only happens on his specific machine, confirming that enabling semaphores shouldn't reduce rc6 residency. Now furthermore the Google ChromeOS guys reported [2] a while ago that at least on some machines a simply a blinking cursor can keep the gpu turbo at the highest frequency. This is because the current rps limits used on snb/ivb are highly asymmetric. On the theory that gpu turbo and rc6 tuning values are related, we've tried whether the much saner looking (since much less asymmetric) rps tuning values used for hsw would also help entering rc6 more robustly. And it seems to mostly work, and we don't really have the resources to through-roughly tune things in any better way: The values from the ChromeOS ppl seem to fare a bit worse for James' machine, so I guess we better stick with something vpg (the gpu hw/windows group) provided, hoping that they've done their jobs. Reference[1]: http://lists.freedesktop.org/archives/dri-devel/2012-July/025675.html Reference[2]: http://lists.freedesktop.org/archives/intel-gfx/2012-July/018692.html Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=53393 Tested-by: Ben Widawsky <ben@bwidawsk.net> Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-08-15 15:41:45 +07:00
I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
I915_WRITE(GEN6_RP_UP_EI, 66000);
I915_WRITE(GEN6_RP_DOWN_EI, 350000);
I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
I915_WRITE(GEN6_RP_CONTROL,
GEN6_RP_MEDIA_TURBO |
GEN6_RP_MEDIA_HW_NORMAL_MODE |
GEN6_RP_MEDIA_IS_GFX |
GEN6_RP_ENABLE |
GEN6_RP_UP_BUSY_AVG |
(IS_HASWELL(dev) ? GEN7_RP_DOWN_IDLE_AVG : GEN6_RP_DOWN_IDLE_CONT));
ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
if (!ret && (IS_GEN6(dev) || IS_IVYBRIDGE(dev))) {
pcu_mbox = 0;
ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
drm/i915: Correct sandybrige overclocking Change the gen6+ max delay if the pcode read was successful (not the inverse). The previous code was all sorts of wrong and has existed since I broke it: commit 42c0526c930523425ff6edc95b7235ce7ab9308d Author: Ben Widawsky <ben@bwidawsk.net> Date: Wed Sep 26 10:34:00 2012 -0700 drm/i915: Extract PCU communication I added some parentheses for clarity, and I also corrected the debug message message to use the mask (wrong before I came along) and added a print to show the value we're changing from. Looking over the code, I'm not actually sure what we're trying to do. I introduced the bug simply by extracting the function not implementing anything new. We already set max_delay based on the capabilities register (which is what we use elsewhere to determine min and max). This would potentially increase it, I suppose? Jesse, I can't find the document which explains the definitions of the pcode commands, maybe you have it around. Based on Jesse's response, this could potentially be for -fixes, or stable, or maybe lead to us dropping it entirely. As the current code is is, things won't completely break because of the aforementioned capabilities register, and in my experimentation, enabling this has no effect, it goes from 1100->1100. I found this while reviewing Jesse's VLV patches. Cc: Jesse Barnes <jbarnes@virtuousgeek.org> Signed-off-by: Ben Widawsky <ben@bwidawsk.net> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> [danvet: Bikeshed-away the redudant parens spotted by Chris Wilson.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-03-20 10:19:56 +07:00
if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */
DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n",
drm/i915: Correct sandybrige overclocking Change the gen6+ max delay if the pcode read was successful (not the inverse). The previous code was all sorts of wrong and has existed since I broke it: commit 42c0526c930523425ff6edc95b7235ce7ab9308d Author: Ben Widawsky <ben@bwidawsk.net> Date: Wed Sep 26 10:34:00 2012 -0700 drm/i915: Extract PCU communication I added some parentheses for clarity, and I also corrected the debug message message to use the mask (wrong before I came along) and added a print to show the value we're changing from. Looking over the code, I'm not actually sure what we're trying to do. I introduced the bug simply by extracting the function not implementing anything new. We already set max_delay based on the capabilities register (which is what we use elsewhere to determine min and max). This would potentially increase it, I suppose? Jesse, I can't find the document which explains the definitions of the pcode commands, maybe you have it around. Based on Jesse's response, this could potentially be for -fixes, or stable, or maybe lead to us dropping it entirely. As the current code is is, things won't completely break because of the aforementioned capabilities register, and in my experimentation, enabling this has no effect, it goes from 1100->1100. I found this while reviewing Jesse's VLV patches. Cc: Jesse Barnes <jbarnes@virtuousgeek.org> Signed-off-by: Ben Widawsky <ben@bwidawsk.net> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org> [danvet: Bikeshed-away the redudant parens spotted by Chris Wilson.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-03-20 10:19:56 +07:00
(dev_priv->rps.max_delay & 0xff) * 50,
(pcu_mbox & 0xff) * 50);
dev_priv->rps.hw_max = pcu_mbox & 0xff;
}
} else {
DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
}
gen6_set_rps(dev_priv->dev, (gt_perf_status & 0xff00) >> 8);
/* requires MSI enabled */
I915_WRITE(GEN6_PMIER, GEN6_PM_DEFERRED_EVENTS);
spin_lock_irq(&dev_priv->rps.lock);
WARN_ON(dev_priv->rps.pm_iir != 0);
I915_WRITE(GEN6_PMIMR, 0);
spin_unlock_irq(&dev_priv->rps.lock);
/* enable all PM interrupts */
I915_WRITE(GEN6_PMINTRMSK, 0);
rc6vids = 0;
ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
if (IS_GEN6(dev) && ret) {
DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
} else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
rc6vids &= 0xffff00;
rc6vids |= GEN6_ENCODE_RC6_VID(450);
ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
if (ret)
DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
}
gen6_gt_force_wake_put(dev_priv);
}
static void gen6_update_ring_freq(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int min_freq = 15;
unsigned int gpu_freq;
unsigned int max_ia_freq, min_ring_freq;
int scaling_factor = 180;
WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
max_ia_freq = cpufreq_quick_get_max(0);
/*
* Default to measured freq if none found, PCU will ensure we don't go
* over
*/
if (!max_ia_freq)
max_ia_freq = tsc_khz;
/* Convert from kHz to MHz */
max_ia_freq /= 1000;
min_ring_freq = I915_READ(MCHBAR_MIRROR_BASE_SNB + DCLK);
/* convert DDR frequency from units of 133.3MHz to bandwidth */
min_ring_freq = (2 * 4 * min_ring_freq + 2) / 3;
/*
* For each potential GPU frequency, load a ring frequency we'd like
* to use for memory access. We do this by specifying the IA frequency
* the PCU should use as a reference to determine the ring frequency.
*/
for (gpu_freq = dev_priv->rps.max_delay; gpu_freq >= dev_priv->rps.min_delay;
gpu_freq--) {
int diff = dev_priv->rps.max_delay - gpu_freq;
unsigned int ia_freq = 0, ring_freq = 0;
if (IS_HASWELL(dev)) {
ring_freq = (gpu_freq * 5 + 3) / 4;
ring_freq = max(min_ring_freq, ring_freq);
/* leave ia_freq as the default, chosen by cpufreq */
} else {
/* On older processors, there is no separate ring
* clock domain, so in order to boost the bandwidth
* of the ring, we need to upclock the CPU (ia_freq).
*
* For GPU frequencies less than 750MHz,
* just use the lowest ring freq.
*/
if (gpu_freq < min_freq)
ia_freq = 800;
else
ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
}
sandybridge_pcode_write(dev_priv,
GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
gpu_freq);
}
}
int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
{
u32 val, rp0;
valleyview_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE, &val);
rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
/* Clamp to max */
rp0 = min_t(u32, rp0, 0xea);
return rp0;
}
static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
{
u32 val, rpe;
valleyview_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO, &val);
rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
valleyview_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI, &val);
rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
return rpe;
}
int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
{
u32 val;
valleyview_punit_read(dev_priv, PUNIT_REG_GPU_LFM, &val);
return val & 0xff;
}
static void vlv_rps_timer_work(struct work_struct *work)
{
drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
rps.vlv_work.work);
/*
* Timer fired, we must be idle. Drop to min voltage state.
* Note: we use RPe here since it should match the
* Vmin we were shooting for. That should give us better
* perf when we come back out of RC6 than if we used the
* min freq available.
*/
mutex_lock(&dev_priv->rps.hw_lock);
valleyview_set_rps(dev_priv->dev, dev_priv->rps.rpe_delay);
mutex_unlock(&dev_priv->rps.hw_lock);
}
static void valleyview_enable_rps(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_ring_buffer *ring;
u32 gtfifodbg, val, rpe;
int i;
WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
if ((gtfifodbg = I915_READ(GTFIFODBG))) {
DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
I915_WRITE(GTFIFODBG, gtfifodbg);
}
gen6_gt_force_wake_get(dev_priv);
I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
I915_WRITE(GEN6_RP_UP_EI, 66000);
I915_WRITE(GEN6_RP_DOWN_EI, 350000);
I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
I915_WRITE(GEN6_RP_CONTROL,
GEN6_RP_MEDIA_TURBO |
GEN6_RP_MEDIA_HW_NORMAL_MODE |
GEN6_RP_MEDIA_IS_GFX |
GEN6_RP_ENABLE |
GEN6_RP_UP_BUSY_AVG |
GEN6_RP_DOWN_IDLE_CONT);
I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
for_each_ring(ring, dev_priv, i)
I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
I915_WRITE(GEN6_RC6_THRESHOLD, 0xc350);
/* allows RC6 residency counter to work */
I915_WRITE(0x138104, _MASKED_BIT_ENABLE(0x3));
I915_WRITE(GEN6_RC_CONTROL,
GEN7_RC_CTL_TO_MODE);
valleyview_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS, &val);
dev_priv->mem_freq = 800 + (266 * (val >> 6) & 3);
DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq);
DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & 0x10 ? "yes" : "no");
DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
DRM_DEBUG_DRIVER("current GPU freq: %d\n",
vlv_gpu_freq(dev_priv->mem_freq, (val >> 8) & 0xff));
dev_priv->rps.cur_delay = (val >> 8) & 0xff;
dev_priv->rps.max_delay = valleyview_rps_max_freq(dev_priv);
dev_priv->rps.hw_max = dev_priv->rps.max_delay;
DRM_DEBUG_DRIVER("max GPU freq: %d\n", vlv_gpu_freq(dev_priv->mem_freq,
dev_priv->rps.max_delay));
rpe = valleyview_rps_rpe_freq(dev_priv);
DRM_DEBUG_DRIVER("RPe GPU freq: %d\n",
vlv_gpu_freq(dev_priv->mem_freq, rpe));
dev_priv->rps.rpe_delay = rpe;
val = valleyview_rps_min_freq(dev_priv);
DRM_DEBUG_DRIVER("min GPU freq: %d\n", vlv_gpu_freq(dev_priv->mem_freq,
val));
dev_priv->rps.min_delay = val;
DRM_DEBUG_DRIVER("setting GPU freq to %d\n",
vlv_gpu_freq(dev_priv->mem_freq, rpe));
INIT_DELAYED_WORK(&dev_priv->rps.vlv_work, vlv_rps_timer_work);
valleyview_set_rps(dev_priv->dev, rpe);
/* requires MSI enabled */
I915_WRITE(GEN6_PMIER, GEN6_PM_DEFERRED_EVENTS);
spin_lock_irq(&dev_priv->rps.lock);
WARN_ON(dev_priv->rps.pm_iir != 0);
I915_WRITE(GEN6_PMIMR, 0);
spin_unlock_irq(&dev_priv->rps.lock);
/* enable all PM interrupts */
I915_WRITE(GEN6_PMINTRMSK, 0);
gen6_gt_force_wake_put(dev_priv);
}
void ironlake_teardown_rc6(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
if (dev_priv->ips.renderctx) {
i915_gem_object_unpin(dev_priv->ips.renderctx);
drm_gem_object_unreference(&dev_priv->ips.renderctx->base);
dev_priv->ips.renderctx = NULL;
}
if (dev_priv->ips.pwrctx) {
i915_gem_object_unpin(dev_priv->ips.pwrctx);
drm_gem_object_unreference(&dev_priv->ips.pwrctx->base);
dev_priv->ips.pwrctx = NULL;
}
}
static void ironlake_disable_rc6(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
if (I915_READ(PWRCTXA)) {
/* Wake the GPU, prevent RC6, then restore RSTDBYCTL */
I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) | RCX_SW_EXIT);
wait_for(((I915_READ(RSTDBYCTL) & RSX_STATUS_MASK) == RSX_STATUS_ON),
50);
I915_WRITE(PWRCTXA, 0);
POSTING_READ(PWRCTXA);
I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
POSTING_READ(RSTDBYCTL);
}
}
static int ironlake_setup_rc6(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
if (dev_priv->ips.renderctx == NULL)
dev_priv->ips.renderctx = intel_alloc_context_page(dev);
if (!dev_priv->ips.renderctx)
return -ENOMEM;
if (dev_priv->ips.pwrctx == NULL)
dev_priv->ips.pwrctx = intel_alloc_context_page(dev);
if (!dev_priv->ips.pwrctx) {
ironlake_teardown_rc6(dev);
return -ENOMEM;
}
return 0;
}
static void ironlake_enable_rc6(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
bool was_interruptible;
int ret;
/* rc6 disabled by default due to repeated reports of hanging during
* boot and resume.
*/
if (!intel_enable_rc6(dev))
return;
WARN_ON(!mutex_is_locked(&dev->struct_mutex));
ret = ironlake_setup_rc6(dev);
if (ret)
return;
was_interruptible = dev_priv->mm.interruptible;
dev_priv->mm.interruptible = false;
/*
* GPU can automatically power down the render unit if given a page
* to save state.
*/
ret = intel_ring_begin(ring, 6);
if (ret) {
ironlake_teardown_rc6(dev);
dev_priv->mm.interruptible = was_interruptible;
return;
}
intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN);
intel_ring_emit(ring, MI_SET_CONTEXT);
intel_ring_emit(ring, dev_priv->ips.renderctx->gtt_offset |
MI_MM_SPACE_GTT |
MI_SAVE_EXT_STATE_EN |
MI_RESTORE_EXT_STATE_EN |
MI_RESTORE_INHIBIT);
intel_ring_emit(ring, MI_SUSPEND_FLUSH);
intel_ring_emit(ring, MI_NOOP);
intel_ring_emit(ring, MI_FLUSH);
intel_ring_advance(ring);
/*
* Wait for the command parser to advance past MI_SET_CONTEXT. The HW
* does an implicit flush, combined with MI_FLUSH above, it should be
* safe to assume that renderctx is valid
*/
ret = intel_ring_idle(ring);
dev_priv->mm.interruptible = was_interruptible;
if (ret) {
DRM_ERROR("failed to enable ironlake power savings\n");
ironlake_teardown_rc6(dev);
return;
}
I915_WRITE(PWRCTXA, dev_priv->ips.pwrctx->gtt_offset | PWRCTX_EN);
I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
}
static unsigned long intel_pxfreq(u32 vidfreq)
{
unsigned long freq;
int div = (vidfreq & 0x3f0000) >> 16;
int post = (vidfreq & 0x3000) >> 12;
int pre = (vidfreq & 0x7);
if (!pre)
return 0;
freq = ((div * 133333) / ((1<<post) * pre));
return freq;
}
static const struct cparams {
u16 i;
u16 t;
u16 m;
u16 c;
} cparams[] = {
{ 1, 1333, 301, 28664 },
{ 1, 1066, 294, 24460 },
{ 1, 800, 294, 25192 },
{ 0, 1333, 276, 27605 },
{ 0, 1066, 276, 27605 },
{ 0, 800, 231, 23784 },
};
static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
{
u64 total_count, diff, ret;
u32 count1, count2, count3, m = 0, c = 0;
unsigned long now = jiffies_to_msecs(jiffies), diff1;
int i;
assert_spin_locked(&mchdev_lock);
diff1 = now - dev_priv->ips.last_time1;
/* Prevent division-by-zero if we are asking too fast.
* Also, we don't get interesting results if we are polling
* faster than once in 10ms, so just return the saved value
* in such cases.
*/
if (diff1 <= 10)
return dev_priv->ips.chipset_power;
count1 = I915_READ(DMIEC);
count2 = I915_READ(DDREC);
count3 = I915_READ(CSIEC);
total_count = count1 + count2 + count3;
/* FIXME: handle per-counter overflow */
if (total_count < dev_priv->ips.last_count1) {
diff = ~0UL - dev_priv->ips.last_count1;
diff += total_count;
} else {
diff = total_count - dev_priv->ips.last_count1;
}
for (i = 0; i < ARRAY_SIZE(cparams); i++) {
if (cparams[i].i == dev_priv->ips.c_m &&
cparams[i].t == dev_priv->ips.r_t) {
m = cparams[i].m;
c = cparams[i].c;
break;
}
}
diff = div_u64(diff, diff1);
ret = ((m * diff) + c);
ret = div_u64(ret, 10);
dev_priv->ips.last_count1 = total_count;
dev_priv->ips.last_time1 = now;
dev_priv->ips.chipset_power = ret;
return ret;
}
unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
{
unsigned long val;
if (dev_priv->info->gen != 5)
return 0;
spin_lock_irq(&mchdev_lock);
val = __i915_chipset_val(dev_priv);
spin_unlock_irq(&mchdev_lock);
return val;
}
unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
{
unsigned long m, x, b;
u32 tsfs;
tsfs = I915_READ(TSFS);
m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
x = I915_READ8(TR1);
b = tsfs & TSFS_INTR_MASK;
return ((m * x) / 127) - b;
}
static u16 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
{
static const struct v_table {
u16 vd; /* in .1 mil */
u16 vm; /* in .1 mil */
} v_table[] = {
{ 0, 0, },
{ 375, 0, },
{ 500, 0, },
{ 625, 0, },
{ 750, 0, },
{ 875, 0, },
{ 1000, 0, },
{ 1125, 0, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4125, 3000, },
{ 4250, 3125, },
{ 4375, 3250, },
{ 4500, 3375, },
{ 4625, 3500, },
{ 4750, 3625, },
{ 4875, 3750, },
{ 5000, 3875, },
{ 5125, 4000, },
{ 5250, 4125, },
{ 5375, 4250, },
{ 5500, 4375, },
{ 5625, 4500, },
{ 5750, 4625, },
{ 5875, 4750, },
{ 6000, 4875, },
{ 6125, 5000, },
{ 6250, 5125, },
{ 6375, 5250, },
{ 6500, 5375, },
{ 6625, 5500, },
{ 6750, 5625, },
{ 6875, 5750, },
{ 7000, 5875, },
{ 7125, 6000, },
{ 7250, 6125, },
{ 7375, 6250, },
{ 7500, 6375, },
{ 7625, 6500, },
{ 7750, 6625, },
{ 7875, 6750, },
{ 8000, 6875, },
{ 8125, 7000, },
{ 8250, 7125, },
{ 8375, 7250, },
{ 8500, 7375, },
{ 8625, 7500, },
{ 8750, 7625, },
{ 8875, 7750, },
{ 9000, 7875, },
{ 9125, 8000, },
{ 9250, 8125, },
{ 9375, 8250, },
{ 9500, 8375, },
{ 9625, 8500, },
{ 9750, 8625, },
{ 9875, 8750, },
{ 10000, 8875, },
{ 10125, 9000, },
{ 10250, 9125, },
{ 10375, 9250, },
{ 10500, 9375, },
{ 10625, 9500, },
{ 10750, 9625, },
{ 10875, 9750, },
{ 11000, 9875, },
{ 11125, 10000, },
{ 11250, 10125, },
{ 11375, 10250, },
{ 11500, 10375, },
{ 11625, 10500, },
{ 11750, 10625, },
{ 11875, 10750, },
{ 12000, 10875, },
{ 12125, 11000, },
{ 12250, 11125, },
{ 12375, 11250, },
{ 12500, 11375, },
{ 12625, 11500, },
{ 12750, 11625, },
{ 12875, 11750, },
{ 13000, 11875, },
{ 13125, 12000, },
{ 13250, 12125, },
{ 13375, 12250, },
{ 13500, 12375, },
{ 13625, 12500, },
{ 13750, 12625, },
{ 13875, 12750, },
{ 14000, 12875, },
{ 14125, 13000, },
{ 14250, 13125, },
{ 14375, 13250, },
{ 14500, 13375, },
{ 14625, 13500, },
{ 14750, 13625, },
{ 14875, 13750, },
{ 15000, 13875, },
{ 15125, 14000, },
{ 15250, 14125, },
{ 15375, 14250, },
{ 15500, 14375, },
{ 15625, 14500, },
{ 15750, 14625, },
{ 15875, 14750, },
{ 16000, 14875, },
{ 16125, 15000, },
};
if (dev_priv->info->is_mobile)
return v_table[pxvid].vm;
else
return v_table[pxvid].vd;
}
static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
{
struct timespec now, diff1;
u64 diff;
unsigned long diffms;
u32 count;
assert_spin_locked(&mchdev_lock);
getrawmonotonic(&now);
diff1 = timespec_sub(now, dev_priv->ips.last_time2);
/* Don't divide by 0 */
diffms = diff1.tv_sec * 1000 + diff1.tv_nsec / 1000000;
if (!diffms)
return;
count = I915_READ(GFXEC);
if (count < dev_priv->ips.last_count2) {
diff = ~0UL - dev_priv->ips.last_count2;
diff += count;
} else {
diff = count - dev_priv->ips.last_count2;
}
dev_priv->ips.last_count2 = count;
dev_priv->ips.last_time2 = now;
/* More magic constants... */
diff = diff * 1181;
diff = div_u64(diff, diffms * 10);
dev_priv->ips.gfx_power = diff;
}
void i915_update_gfx_val(struct drm_i915_private *dev_priv)
{
if (dev_priv->info->gen != 5)
return;
spin_lock_irq(&mchdev_lock);
__i915_update_gfx_val(dev_priv);
spin_unlock_irq(&mchdev_lock);
}
static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
{
unsigned long t, corr, state1, corr2, state2;
u32 pxvid, ext_v;
assert_spin_locked(&mchdev_lock);
pxvid = I915_READ(PXVFREQ_BASE + (dev_priv->rps.cur_delay * 4));
pxvid = (pxvid >> 24) & 0x7f;
ext_v = pvid_to_extvid(dev_priv, pxvid);
state1 = ext_v;
t = i915_mch_val(dev_priv);
/* Revel in the empirically derived constants */
/* Correction factor in 1/100000 units */
if (t > 80)
corr = ((t * 2349) + 135940);
else if (t >= 50)
corr = ((t * 964) + 29317);
else /* < 50 */
corr = ((t * 301) + 1004);
corr = corr * ((150142 * state1) / 10000 - 78642);
corr /= 100000;
corr2 = (corr * dev_priv->ips.corr);
state2 = (corr2 * state1) / 10000;
state2 /= 100; /* convert to mW */
__i915_update_gfx_val(dev_priv);
return dev_priv->ips.gfx_power + state2;
}
unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
{
unsigned long val;
if (dev_priv->info->gen != 5)
return 0;
spin_lock_irq(&mchdev_lock);
val = __i915_gfx_val(dev_priv);
spin_unlock_irq(&mchdev_lock);
return val;
}
/**
* i915_read_mch_val - return value for IPS use
*
* Calculate and return a value for the IPS driver to use when deciding whether
* we have thermal and power headroom to increase CPU or GPU power budget.
*/
unsigned long i915_read_mch_val(void)
{
struct drm_i915_private *dev_priv;
unsigned long chipset_val, graphics_val, ret = 0;
spin_lock_irq(&mchdev_lock);
if (!i915_mch_dev)
goto out_unlock;
dev_priv = i915_mch_dev;
chipset_val = __i915_chipset_val(dev_priv);
graphics_val = __i915_gfx_val(dev_priv);
ret = chipset_val + graphics_val;
out_unlock:
spin_unlock_irq(&mchdev_lock);
return ret;
}
EXPORT_SYMBOL_GPL(i915_read_mch_val);
/**
* i915_gpu_raise - raise GPU frequency limit
*
* Raise the limit; IPS indicates we have thermal headroom.
*/
bool i915_gpu_raise(void)
{
struct drm_i915_private *dev_priv;
bool ret = true;
spin_lock_irq(&mchdev_lock);
if (!i915_mch_dev) {
ret = false;
goto out_unlock;
}
dev_priv = i915_mch_dev;
if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
dev_priv->ips.max_delay--;
out_unlock:
spin_unlock_irq(&mchdev_lock);
return ret;
}
EXPORT_SYMBOL_GPL(i915_gpu_raise);
/**
* i915_gpu_lower - lower GPU frequency limit
*
* IPS indicates we're close to a thermal limit, so throttle back the GPU
* frequency maximum.
*/
bool i915_gpu_lower(void)
{
struct drm_i915_private *dev_priv;
bool ret = true;
spin_lock_irq(&mchdev_lock);
if (!i915_mch_dev) {
ret = false;
goto out_unlock;
}
dev_priv = i915_mch_dev;
if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
dev_priv->ips.max_delay++;
out_unlock:
spin_unlock_irq(&mchdev_lock);
return ret;
}
EXPORT_SYMBOL_GPL(i915_gpu_lower);
/**
* i915_gpu_busy - indicate GPU business to IPS
*
* Tell the IPS driver whether or not the GPU is busy.
*/
bool i915_gpu_busy(void)
{
struct drm_i915_private *dev_priv;
struct intel_ring_buffer *ring;
bool ret = false;
int i;
spin_lock_irq(&mchdev_lock);
if (!i915_mch_dev)
goto out_unlock;
dev_priv = i915_mch_dev;
for_each_ring(ring, dev_priv, i)
ret |= !list_empty(&ring->request_list);
out_unlock:
spin_unlock_irq(&mchdev_lock);
return ret;
}
EXPORT_SYMBOL_GPL(i915_gpu_busy);
/**
* i915_gpu_turbo_disable - disable graphics turbo
*
* Disable graphics turbo by resetting the max frequency and setting the
* current frequency to the default.
*/
bool i915_gpu_turbo_disable(void)
{
struct drm_i915_private *dev_priv;
bool ret = true;
spin_lock_irq(&mchdev_lock);
if (!i915_mch_dev) {
ret = false;
goto out_unlock;
}
dev_priv = i915_mch_dev;
dev_priv->ips.max_delay = dev_priv->ips.fstart;
if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart))
ret = false;
out_unlock:
spin_unlock_irq(&mchdev_lock);
return ret;
}
EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
/**
* Tells the intel_ips driver that the i915 driver is now loaded, if
* IPS got loaded first.
*
* This awkward dance is so that neither module has to depend on the
* other in order for IPS to do the appropriate communication of
* GPU turbo limits to i915.
*/
static void
ips_ping_for_i915_load(void)
{
void (*link)(void);
link = symbol_get(ips_link_to_i915_driver);
if (link) {
link();
symbol_put(ips_link_to_i915_driver);
}
}
void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
{
/* We only register the i915 ips part with intel-ips once everything is
* set up, to avoid intel-ips sneaking in and reading bogus values. */
spin_lock_irq(&mchdev_lock);
i915_mch_dev = dev_priv;
spin_unlock_irq(&mchdev_lock);
ips_ping_for_i915_load();
}
void intel_gpu_ips_teardown(void)
{
spin_lock_irq(&mchdev_lock);
i915_mch_dev = NULL;
spin_unlock_irq(&mchdev_lock);
}
static void intel_init_emon(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u32 lcfuse;
u8 pxw[16];
int i;
/* Disable to program */
I915_WRITE(ECR, 0);
POSTING_READ(ECR);
/* Program energy weights for various events */
I915_WRITE(SDEW, 0x15040d00);
I915_WRITE(CSIEW0, 0x007f0000);
I915_WRITE(CSIEW1, 0x1e220004);
I915_WRITE(CSIEW2, 0x04000004);
for (i = 0; i < 5; i++)
I915_WRITE(PEW + (i * 4), 0);
for (i = 0; i < 3; i++)
I915_WRITE(DEW + (i * 4), 0);
/* Program P-state weights to account for frequency power adjustment */
for (i = 0; i < 16; i++) {
u32 pxvidfreq = I915_READ(PXVFREQ_BASE + (i * 4));
unsigned long freq = intel_pxfreq(pxvidfreq);
unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
PXVFREQ_PX_SHIFT;
unsigned long val;
val = vid * vid;
val *= (freq / 1000);
val *= 255;
val /= (127*127*900);
if (val > 0xff)
DRM_ERROR("bad pxval: %ld\n", val);
pxw[i] = val;
}
/* Render standby states get 0 weight */
pxw[14] = 0;
pxw[15] = 0;
for (i = 0; i < 4; i++) {
u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
(pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
I915_WRITE(PXW + (i * 4), val);
}
/* Adjust magic regs to magic values (more experimental results) */
I915_WRITE(OGW0, 0);
I915_WRITE(OGW1, 0);
I915_WRITE(EG0, 0x00007f00);
I915_WRITE(EG1, 0x0000000e);
I915_WRITE(EG2, 0x000e0000);
I915_WRITE(EG3, 0x68000300);
I915_WRITE(EG4, 0x42000000);
I915_WRITE(EG5, 0x00140031);
I915_WRITE(EG6, 0);
I915_WRITE(EG7, 0);
for (i = 0; i < 8; i++)
I915_WRITE(PXWL + (i * 4), 0);
/* Enable PMON + select events */
I915_WRITE(ECR, 0x80000019);
lcfuse = I915_READ(LCFUSE02);
dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
}
void intel_disable_gt_powersave(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
/* Interrupts should be disabled already to avoid re-arming. */
WARN_ON(dev->irq_enabled);
if (IS_IRONLAKE_M(dev)) {
ironlake_disable_drps(dev);
ironlake_disable_rc6(dev);
} else if (INTEL_INFO(dev)->gen >= 6) {
cancel_delayed_work_sync(&dev_priv->rps.delayed_resume_work);
cancel_work_sync(&dev_priv->rps.work);
if (IS_VALLEYVIEW(dev))
cancel_delayed_work_sync(&dev_priv->rps.vlv_work);
mutex_lock(&dev_priv->rps.hw_lock);
if (IS_VALLEYVIEW(dev))
valleyview_disable_rps(dev);
else
gen6_disable_rps(dev);
mutex_unlock(&dev_priv->rps.hw_lock);
}
}
static void intel_gen6_powersave_work(struct work_struct *work)
{
struct drm_i915_private *dev_priv =
container_of(work, struct drm_i915_private,
rps.delayed_resume_work.work);
struct drm_device *dev = dev_priv->dev;
mutex_lock(&dev_priv->rps.hw_lock);
if (IS_VALLEYVIEW(dev)) {
valleyview_enable_rps(dev);
} else {
gen6_enable_rps(dev);
gen6_update_ring_freq(dev);
}
mutex_unlock(&dev_priv->rps.hw_lock);
}
void intel_enable_gt_powersave(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
if (IS_IRONLAKE_M(dev)) {
ironlake_enable_drps(dev);
ironlake_enable_rc6(dev);
intel_init_emon(dev);
} else if (IS_GEN6(dev) || IS_GEN7(dev)) {
/*
* PCU communication is slow and this doesn't need to be
* done at any specific time, so do this out of our fast path
* to make resume and init faster.
*/
schedule_delayed_work(&dev_priv->rps.delayed_resume_work,
round_jiffies_up_relative(HZ));
}
}
static void ibx_init_clock_gating(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
/*
* On Ibex Peak and Cougar Point, we need to disable clock
* gating for the panel power sequencer or it will fail to
* start up when no ports are active.
*/
I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
}
static void ironlake_init_clock_gating(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
drm/i915: Consolidate ILK_DSPCLK_GATE and PCH_DSPCLK_GATE Register 0x42020 was defined twice under the names PCH_DSPCLK_GATE_D and ILK_DSPCLK_GATE. This patch consolidate the 2 sets of defines in one. The transforms done are: PCH_DSPCLK_GATE_D -> ILK_DSPCLK_GATE_D ILK_DSPCLK_GATE -> ILK_DSPCLK_GATE_D DPARBUNIT_CLOCK_GATE_DISABLE -> ILK_DPARBUNIT_CLOCK_GATE_DISABLE ILK_DPARB_CLK_GATE -> ILK_DPARBUNIT_CLOCK_GATE_DISABLE DPFDUNIT_CLOCK_GATE_DISABLE -> ILK_DPFDUNIT_CLOCK_GATE_DISABLE ILK_DPFD_CLK_GATE -> ILK_DPFDUNIT_CLOCK_GATE_DISABLE ILK_CLK_FBC -> ILK_DPFDUNIT_CLOCK_GATE_DISABLE DPFCRUNIT_CLOCK_GATE_DISABLE -> ILK_DPFCRUNIT_CLOCK_GATE_DISABLE ILK_DPFC_DIS1 -> ILK_DPFCRUNIT_CLOCK_GATE_DISABLE DPFCUNIT_CLOCK_GATE_DISABLE -> ILK_DPFCUNIT_CLOCK_GATE_DISABLE ILK_DPFC_DIS2 -> ILK_DPFCUNIT_CLOCK_GATE_DISABLE We have a VHRUNIT_CLOCK_GATE_DISABLE define for the pre-ILK DSPCLK_GATE_D. Even if the same bit is used in ILK_DSPCLK_GATE_D, other bits in the register change, so I went with re-defining it, well more precisely rename IVB_VRHUNIT_CLK_GATE, which is not specific to IVB+. So: IVB_VRHUNIT_CLK_GATE -> ILK_VHRUNIT_CLOCK_GATE_DISABLE VHRUNIT_CLOCK_GATE_DISABLE -> ILK_VHRUNIT_CLOCK_GATE_DISABLE (ILK+ code) This commit is only a renaming commit, further commits will clean up the logic. v2: Rename bit 5 and 7 to _ENABLE as setting them to 1 enables clock gating on their respective units, contrary to all of the other bits (Paulo Zanoni) Signed-off-by: Damien Lespiau <damien.lespiau@intel.com> Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-10-19 23:55:41 +07:00
uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
/* Required for FBC */
dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
I915_WRITE(PCH_3DCGDIS0,
MARIUNIT_CLOCK_GATE_DISABLE |
SVSMUNIT_CLOCK_GATE_DISABLE);
I915_WRITE(PCH_3DCGDIS1,
VFMUNIT_CLOCK_GATE_DISABLE);
/*
* According to the spec the following bits should be set in
* order to enable memory self-refresh
* The bit 22/21 of 0x42004
* The bit 5 of 0x42020
* The bit 15 of 0x45000
*/
I915_WRITE(ILK_DISPLAY_CHICKEN2,
(I915_READ(ILK_DISPLAY_CHICKEN2) |
ILK_DPARB_GATE | ILK_VSDPFD_FULL));
dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
I915_WRITE(DISP_ARB_CTL,
(I915_READ(DISP_ARB_CTL) |
DISP_FBC_WM_DIS));
I915_WRITE(WM3_LP_ILK, 0);
I915_WRITE(WM2_LP_ILK, 0);
I915_WRITE(WM1_LP_ILK, 0);
/*
* Based on the document from hardware guys the following bits
* should be set unconditionally in order to enable FBC.
* The bit 22 of 0x42000
* The bit 22 of 0x42004
* The bit 7,8,9 of 0x42020.
*/
if (IS_IRONLAKE_M(dev)) {
I915_WRITE(ILK_DISPLAY_CHICKEN1,
I915_READ(ILK_DISPLAY_CHICKEN1) |
ILK_FBCQ_DIS);
I915_WRITE(ILK_DISPLAY_CHICKEN2,
I915_READ(ILK_DISPLAY_CHICKEN2) |
ILK_DPARB_GATE);
}
I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
I915_WRITE(ILK_DISPLAY_CHICKEN2,
I915_READ(ILK_DISPLAY_CHICKEN2) |
ILK_ELPIN_409_SELECT);
I915_WRITE(_3D_CHICKEN2,
_3D_CHICKEN2_WM_READ_PIPELINED << 16 |
_3D_CHICKEN2_WM_READ_PIPELINED);
/* WaDisableRenderCachePipelinedFlush */
I915_WRITE(CACHE_MODE_0,
_MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
ibx_init_clock_gating(dev);
}
static void cpt_init_clock_gating(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int pipe;
uint32_t val;
/*
* On Ibex Peak and Cougar Point, we need to disable clock
* gating for the panel power sequencer or it will fail to
* start up when no ports are active.
*/
I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
DPLS_EDP_PPS_FIX_DIS);
/* The below fixes the weird display corruption, a few pixels shifted
* downward, on (only) LVDS of some HP laptops with IVY.
*/
for_each_pipe(pipe) {
val = I915_READ(TRANS_CHICKEN2(pipe));
val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
if (dev_priv->fdi_rx_polarity_inverted)
val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
I915_WRITE(TRANS_CHICKEN2(pipe), val);
}
/* WADP0ClockGatingDisable */
for_each_pipe(pipe) {
I915_WRITE(TRANS_CHICKEN1(pipe),
TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
}
}
static void gen6_check_mch_setup(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t tmp;
tmp = I915_READ(MCH_SSKPD);
if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) {
DRM_INFO("Wrong MCH_SSKPD value: 0x%08x\n", tmp);
DRM_INFO("This can cause pipe underruns and display issues.\n");
DRM_INFO("Please upgrade your BIOS to fix this.\n");
}
}
static void gen6_init_clock_gating(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int pipe;
drm/i915: Consolidate ILK_DSPCLK_GATE and PCH_DSPCLK_GATE Register 0x42020 was defined twice under the names PCH_DSPCLK_GATE_D and ILK_DSPCLK_GATE. This patch consolidate the 2 sets of defines in one. The transforms done are: PCH_DSPCLK_GATE_D -> ILK_DSPCLK_GATE_D ILK_DSPCLK_GATE -> ILK_DSPCLK_GATE_D DPARBUNIT_CLOCK_GATE_DISABLE -> ILK_DPARBUNIT_CLOCK_GATE_DISABLE ILK_DPARB_CLK_GATE -> ILK_DPARBUNIT_CLOCK_GATE_DISABLE DPFDUNIT_CLOCK_GATE_DISABLE -> ILK_DPFDUNIT_CLOCK_GATE_DISABLE ILK_DPFD_CLK_GATE -> ILK_DPFDUNIT_CLOCK_GATE_DISABLE ILK_CLK_FBC -> ILK_DPFDUNIT_CLOCK_GATE_DISABLE DPFCRUNIT_CLOCK_GATE_DISABLE -> ILK_DPFCRUNIT_CLOCK_GATE_DISABLE ILK_DPFC_DIS1 -> ILK_DPFCRUNIT_CLOCK_GATE_DISABLE DPFCUNIT_CLOCK_GATE_DISABLE -> ILK_DPFCUNIT_CLOCK_GATE_DISABLE ILK_DPFC_DIS2 -> ILK_DPFCUNIT_CLOCK_GATE_DISABLE We have a VHRUNIT_CLOCK_GATE_DISABLE define for the pre-ILK DSPCLK_GATE_D. Even if the same bit is used in ILK_DSPCLK_GATE_D, other bits in the register change, so I went with re-defining it, well more precisely rename IVB_VRHUNIT_CLK_GATE, which is not specific to IVB+. So: IVB_VRHUNIT_CLK_GATE -> ILK_VHRUNIT_CLOCK_GATE_DISABLE VHRUNIT_CLOCK_GATE_DISABLE -> ILK_VHRUNIT_CLOCK_GATE_DISABLE (ILK+ code) This commit is only a renaming commit, further commits will clean up the logic. v2: Rename bit 5 and 7 to _ENABLE as setting them to 1 enables clock gating on their respective units, contrary to all of the other bits (Paulo Zanoni) Signed-off-by: Damien Lespiau <damien.lespiau@intel.com> Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-10-19 23:55:41 +07:00
uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
drm/i915: Consolidate ILK_DSPCLK_GATE and PCH_DSPCLK_GATE Register 0x42020 was defined twice under the names PCH_DSPCLK_GATE_D and ILK_DSPCLK_GATE. This patch consolidate the 2 sets of defines in one. The transforms done are: PCH_DSPCLK_GATE_D -> ILK_DSPCLK_GATE_D ILK_DSPCLK_GATE -> ILK_DSPCLK_GATE_D DPARBUNIT_CLOCK_GATE_DISABLE -> ILK_DPARBUNIT_CLOCK_GATE_DISABLE ILK_DPARB_CLK_GATE -> ILK_DPARBUNIT_CLOCK_GATE_DISABLE DPFDUNIT_CLOCK_GATE_DISABLE -> ILK_DPFDUNIT_CLOCK_GATE_DISABLE ILK_DPFD_CLK_GATE -> ILK_DPFDUNIT_CLOCK_GATE_DISABLE ILK_CLK_FBC -> ILK_DPFDUNIT_CLOCK_GATE_DISABLE DPFCRUNIT_CLOCK_GATE_DISABLE -> ILK_DPFCRUNIT_CLOCK_GATE_DISABLE ILK_DPFC_DIS1 -> ILK_DPFCRUNIT_CLOCK_GATE_DISABLE DPFCUNIT_CLOCK_GATE_DISABLE -> ILK_DPFCUNIT_CLOCK_GATE_DISABLE ILK_DPFC_DIS2 -> ILK_DPFCUNIT_CLOCK_GATE_DISABLE We have a VHRUNIT_CLOCK_GATE_DISABLE define for the pre-ILK DSPCLK_GATE_D. Even if the same bit is used in ILK_DSPCLK_GATE_D, other bits in the register change, so I went with re-defining it, well more precisely rename IVB_VRHUNIT_CLK_GATE, which is not specific to IVB+. So: IVB_VRHUNIT_CLK_GATE -> ILK_VHRUNIT_CLOCK_GATE_DISABLE VHRUNIT_CLOCK_GATE_DISABLE -> ILK_VHRUNIT_CLOCK_GATE_DISABLE (ILK+ code) This commit is only a renaming commit, further commits will clean up the logic. v2: Rename bit 5 and 7 to _ENABLE as setting them to 1 enables clock gating on their respective units, contrary to all of the other bits (Paulo Zanoni) Signed-off-by: Damien Lespiau <damien.lespiau@intel.com> Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-10-19 23:55:41 +07:00
I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
I915_WRITE(ILK_DISPLAY_CHICKEN2,
I915_READ(ILK_DISPLAY_CHICKEN2) |
ILK_ELPIN_409_SELECT);
/* WaDisableHiZPlanesWhenMSAAEnabled */
I915_WRITE(_3D_CHICKEN,
_MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
/* WaSetupGtModeTdRowDispatch */
if (IS_SNB_GT1(dev))
I915_WRITE(GEN6_GT_MODE,
_MASKED_BIT_ENABLE(GEN6_TD_FOUR_ROW_DISPATCH_DISABLE));
I915_WRITE(WM3_LP_ILK, 0);
I915_WRITE(WM2_LP_ILK, 0);
I915_WRITE(WM1_LP_ILK, 0);
I915_WRITE(CACHE_MODE_0,
_MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
I915_WRITE(GEN6_UCGCTL1,
I915_READ(GEN6_UCGCTL1) |
GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
GEN6_CSUNIT_CLOCK_GATE_DISABLE);
/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
* gating disable must be set. Failure to set it results in
* flickering pixels due to Z write ordering failures after
* some amount of runtime in the Mesa "fire" demo, and Unigine
* Sanctuary and Tropics, and apparently anything else with
* alpha test or pixel discard.
*
* According to the spec, bit 11 (RCCUNIT) must also be set,
* but we didn't debug actual testcases to find it out.
*
* Also apply WaDisableVDSUnitClockGating and
* WaDisableRCPBUnitClockGating.
*/
I915_WRITE(GEN6_UCGCTL2,
GEN7_VDSUNIT_CLOCK_GATE_DISABLE |
GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
/* Bspec says we need to always set all mask bits. */
I915_WRITE(_3D_CHICKEN3, (0xFFFF << 16) |
_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL);
/*
* According to the spec the following bits should be
* set in order to enable memory self-refresh and fbc:
* The bit21 and bit22 of 0x42000
* The bit21 and bit22 of 0x42004
* The bit5 and bit7 of 0x42020
* The bit14 of 0x70180
* The bit14 of 0x71180
*/
I915_WRITE(ILK_DISPLAY_CHICKEN1,
I915_READ(ILK_DISPLAY_CHICKEN1) |
ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
I915_WRITE(ILK_DISPLAY_CHICKEN2,
I915_READ(ILK_DISPLAY_CHICKEN2) |
ILK_DPARB_GATE | ILK_VSDPFD_FULL);
drm/i915: Consolidate ILK_DSPCLK_GATE and PCH_DSPCLK_GATE Register 0x42020 was defined twice under the names PCH_DSPCLK_GATE_D and ILK_DSPCLK_GATE. This patch consolidate the 2 sets of defines in one. The transforms done are: PCH_DSPCLK_GATE_D -> ILK_DSPCLK_GATE_D ILK_DSPCLK_GATE -> ILK_DSPCLK_GATE_D DPARBUNIT_CLOCK_GATE_DISABLE -> ILK_DPARBUNIT_CLOCK_GATE_DISABLE ILK_DPARB_CLK_GATE -> ILK_DPARBUNIT_CLOCK_GATE_DISABLE DPFDUNIT_CLOCK_GATE_DISABLE -> ILK_DPFDUNIT_CLOCK_GATE_DISABLE ILK_DPFD_CLK_GATE -> ILK_DPFDUNIT_CLOCK_GATE_DISABLE ILK_CLK_FBC -> ILK_DPFDUNIT_CLOCK_GATE_DISABLE DPFCRUNIT_CLOCK_GATE_DISABLE -> ILK_DPFCRUNIT_CLOCK_GATE_DISABLE ILK_DPFC_DIS1 -> ILK_DPFCRUNIT_CLOCK_GATE_DISABLE DPFCUNIT_CLOCK_GATE_DISABLE -> ILK_DPFCUNIT_CLOCK_GATE_DISABLE ILK_DPFC_DIS2 -> ILK_DPFCUNIT_CLOCK_GATE_DISABLE We have a VHRUNIT_CLOCK_GATE_DISABLE define for the pre-ILK DSPCLK_GATE_D. Even if the same bit is used in ILK_DSPCLK_GATE_D, other bits in the register change, so I went with re-defining it, well more precisely rename IVB_VRHUNIT_CLK_GATE, which is not specific to IVB+. So: IVB_VRHUNIT_CLK_GATE -> ILK_VHRUNIT_CLOCK_GATE_DISABLE VHRUNIT_CLOCK_GATE_DISABLE -> ILK_VHRUNIT_CLOCK_GATE_DISABLE (ILK+ code) This commit is only a renaming commit, further commits will clean up the logic. v2: Rename bit 5 and 7 to _ENABLE as setting them to 1 enables clock gating on their respective units, contrary to all of the other bits (Paulo Zanoni) Signed-off-by: Damien Lespiau <damien.lespiau@intel.com> Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-10-19 23:55:41 +07:00
I915_WRITE(ILK_DSPCLK_GATE_D,
I915_READ(ILK_DSPCLK_GATE_D) |
ILK_DPARBUNIT_CLOCK_GATE_ENABLE |
ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
/* WaMbcDriverBootEnable */
I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
GEN6_MBCTL_ENABLE_BOOT_FETCH);
for_each_pipe(pipe) {
I915_WRITE(DSPCNTR(pipe),
I915_READ(DSPCNTR(pipe)) |
DISPPLANE_TRICKLE_FEED_DISABLE);
intel_flush_display_plane(dev_priv, pipe);
}
/* The default value should be 0x200 according to docs, but the two
* platforms I checked have a 0 for this. (Maybe BIOS overrides?) */
I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_DISABLE(0xffff));
I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_ENABLE(GEN6_GT_MODE_HI));
cpt_init_clock_gating(dev);
gen6_check_mch_setup(dev);
}
static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
{
uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
reg &= ~GEN7_FF_SCHED_MASK;
reg |= GEN7_FF_TS_SCHED_HW;
reg |= GEN7_FF_VS_SCHED_HW;
reg |= GEN7_FF_DS_SCHED_HW;
/* WaVSRefCountFullforceMissDisable */
if (IS_HASWELL(dev_priv->dev))
reg &= ~GEN7_FF_VS_REF_CNT_FFME;
I915_WRITE(GEN7_FF_THREAD_MODE, reg);
}
static void lpt_init_clock_gating(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
/*
* TODO: this bit should only be enabled when really needed, then
* disabled when not needed anymore in order to save power.
*/
if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE)
I915_WRITE(SOUTH_DSPCLK_GATE_D,
I915_READ(SOUTH_DSPCLK_GATE_D) |
PCH_LP_PARTITION_LEVEL_DISABLE);
}
static void haswell_init_clock_gating(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int pipe;
I915_WRITE(WM3_LP_ILK, 0);
I915_WRITE(WM2_LP_ILK, 0);
I915_WRITE(WM1_LP_ILK, 0);
/* According to the spec, bit 13 (RCZUNIT) must be set on IVB.
* This implements the WaDisableRCZUnitClockGating workaround.
*/
I915_WRITE(GEN6_UCGCTL2, GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
/* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */
I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
/* WaApplyL3ControlAndL3ChickenMode requires those two on Ivy Bridge */
I915_WRITE(GEN7_L3CNTLREG1,
GEN7_WA_FOR_GEN7_L3_CONTROL);
I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
GEN7_WA_L3_CHICKEN_MODE);
/* This is required by WaCatErrorRejectionIssue */
I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
for_each_pipe(pipe) {
I915_WRITE(DSPCNTR(pipe),
I915_READ(DSPCNTR(pipe)) |
DISPPLANE_TRICKLE_FEED_DISABLE);
intel_flush_display_plane(dev_priv, pipe);
}
gen7_setup_fixed_func_scheduler(dev_priv);
/* WaDisable4x2SubspanOptimization */
I915_WRITE(CACHE_MODE_1,
_MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
/* WaMbcDriverBootEnable */
I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
GEN6_MBCTL_ENABLE_BOOT_FETCH);
/* WaSwitchSolVfFArbitrationPriority */
I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
/* XXX: This is a workaround for early silicon revisions and should be
* removed later.
*/
I915_WRITE(WM_DBG,
I915_READ(WM_DBG) |
WM_DBG_DISALLOW_MULTIPLE_LP |
WM_DBG_DISALLOW_SPRITE |
WM_DBG_DISALLOW_MAXFIFO);
lpt_init_clock_gating(dev);
}
static void ivybridge_init_clock_gating(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int pipe;
uint32_t snpcr;
I915_WRITE(WM3_LP_ILK, 0);
I915_WRITE(WM2_LP_ILK, 0);
I915_WRITE(WM1_LP_ILK, 0);
drm/i915: Consolidate ILK_DSPCLK_GATE and PCH_DSPCLK_GATE Register 0x42020 was defined twice under the names PCH_DSPCLK_GATE_D and ILK_DSPCLK_GATE. This patch consolidate the 2 sets of defines in one. The transforms done are: PCH_DSPCLK_GATE_D -> ILK_DSPCLK_GATE_D ILK_DSPCLK_GATE -> ILK_DSPCLK_GATE_D DPARBUNIT_CLOCK_GATE_DISABLE -> ILK_DPARBUNIT_CLOCK_GATE_DISABLE ILK_DPARB_CLK_GATE -> ILK_DPARBUNIT_CLOCK_GATE_DISABLE DPFDUNIT_CLOCK_GATE_DISABLE -> ILK_DPFDUNIT_CLOCK_GATE_DISABLE ILK_DPFD_CLK_GATE -> ILK_DPFDUNIT_CLOCK_GATE_DISABLE ILK_CLK_FBC -> ILK_DPFDUNIT_CLOCK_GATE_DISABLE DPFCRUNIT_CLOCK_GATE_DISABLE -> ILK_DPFCRUNIT_CLOCK_GATE_DISABLE ILK_DPFC_DIS1 -> ILK_DPFCRUNIT_CLOCK_GATE_DISABLE DPFCUNIT_CLOCK_GATE_DISABLE -> ILK_DPFCUNIT_CLOCK_GATE_DISABLE ILK_DPFC_DIS2 -> ILK_DPFCUNIT_CLOCK_GATE_DISABLE We have a VHRUNIT_CLOCK_GATE_DISABLE define for the pre-ILK DSPCLK_GATE_D. Even if the same bit is used in ILK_DSPCLK_GATE_D, other bits in the register change, so I went with re-defining it, well more precisely rename IVB_VRHUNIT_CLK_GATE, which is not specific to IVB+. So: IVB_VRHUNIT_CLK_GATE -> ILK_VHRUNIT_CLOCK_GATE_DISABLE VHRUNIT_CLOCK_GATE_DISABLE -> ILK_VHRUNIT_CLOCK_GATE_DISABLE (ILK+ code) This commit is only a renaming commit, further commits will clean up the logic. v2: Rename bit 5 and 7 to _ENABLE as setting them to 1 enables clock gating on their respective units, contrary to all of the other bits (Paulo Zanoni) Signed-off-by: Damien Lespiau <damien.lespiau@intel.com> Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-10-19 23:55:41 +07:00
I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
/* WaDisableEarlyCull */
I915_WRITE(_3D_CHICKEN3,
_MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
/* WaDisableBackToBackFlipFix */
I915_WRITE(IVB_CHICKEN3,
CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
CHICKEN3_DGMG_DONE_FIX_DISABLE);
/* WaDisablePSDDualDispatchEnable */
if (IS_IVB_GT1(dev))
I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
_MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
else
I915_WRITE(GEN7_HALF_SLICE_CHICKEN1_GT2,
_MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
/* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */
I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
/* WaApplyL3ControlAndL3ChickenMode requires those two on Ivy Bridge */
I915_WRITE(GEN7_L3CNTLREG1,
GEN7_WA_FOR_GEN7_L3_CONTROL);
I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
GEN7_WA_L3_CHICKEN_MODE);
if (IS_IVB_GT1(dev))
I915_WRITE(GEN7_ROW_CHICKEN2,
_MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
else
I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
_MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
/* WaForceL3Serialization */
I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
~L3SQ_URB_READ_CAM_MATCH_DISABLE);
/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
* gating disable must be set. Failure to set it results in
* flickering pixels due to Z write ordering failures after
* some amount of runtime in the Mesa "fire" demo, and Unigine
* Sanctuary and Tropics, and apparently anything else with
* alpha test or pixel discard.
*
* According to the spec, bit 11 (RCCUNIT) must also be set,
* but we didn't debug actual testcases to find it out.
*
* According to the spec, bit 13 (RCZUNIT) must be set on IVB.
* This implements the WaDisableRCZUnitClockGating workaround.
*/
I915_WRITE(GEN6_UCGCTL2,
GEN6_RCZUNIT_CLOCK_GATE_DISABLE |
GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
/* This is required by WaCatErrorRejectionIssue */
I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
for_each_pipe(pipe) {
I915_WRITE(DSPCNTR(pipe),
I915_READ(DSPCNTR(pipe)) |
DISPPLANE_TRICKLE_FEED_DISABLE);
intel_flush_display_plane(dev_priv, pipe);
}
/* WaMbcDriverBootEnable */
I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
GEN6_MBCTL_ENABLE_BOOT_FETCH);
gen7_setup_fixed_func_scheduler(dev_priv);
/* WaDisable4x2SubspanOptimization */
I915_WRITE(CACHE_MODE_1,
_MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
snpcr &= ~GEN6_MBC_SNPCR_MASK;
snpcr |= GEN6_MBC_SNPCR_MED;
I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
if (!HAS_PCH_NOP(dev))
cpt_init_clock_gating(dev);
gen6_check_mch_setup(dev);
}
static void valleyview_init_clock_gating(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int pipe;
I915_WRITE(WM3_LP_ILK, 0);
I915_WRITE(WM2_LP_ILK, 0);
I915_WRITE(WM1_LP_ILK, 0);
drm/i915: Consolidate ILK_DSPCLK_GATE and PCH_DSPCLK_GATE Register 0x42020 was defined twice under the names PCH_DSPCLK_GATE_D and ILK_DSPCLK_GATE. This patch consolidate the 2 sets of defines in one. The transforms done are: PCH_DSPCLK_GATE_D -> ILK_DSPCLK_GATE_D ILK_DSPCLK_GATE -> ILK_DSPCLK_GATE_D DPARBUNIT_CLOCK_GATE_DISABLE -> ILK_DPARBUNIT_CLOCK_GATE_DISABLE ILK_DPARB_CLK_GATE -> ILK_DPARBUNIT_CLOCK_GATE_DISABLE DPFDUNIT_CLOCK_GATE_DISABLE -> ILK_DPFDUNIT_CLOCK_GATE_DISABLE ILK_DPFD_CLK_GATE -> ILK_DPFDUNIT_CLOCK_GATE_DISABLE ILK_CLK_FBC -> ILK_DPFDUNIT_CLOCK_GATE_DISABLE DPFCRUNIT_CLOCK_GATE_DISABLE -> ILK_DPFCRUNIT_CLOCK_GATE_DISABLE ILK_DPFC_DIS1 -> ILK_DPFCRUNIT_CLOCK_GATE_DISABLE DPFCUNIT_CLOCK_GATE_DISABLE -> ILK_DPFCUNIT_CLOCK_GATE_DISABLE ILK_DPFC_DIS2 -> ILK_DPFCUNIT_CLOCK_GATE_DISABLE We have a VHRUNIT_CLOCK_GATE_DISABLE define for the pre-ILK DSPCLK_GATE_D. Even if the same bit is used in ILK_DSPCLK_GATE_D, other bits in the register change, so I went with re-defining it, well more precisely rename IVB_VRHUNIT_CLK_GATE, which is not specific to IVB+. So: IVB_VRHUNIT_CLK_GATE -> ILK_VHRUNIT_CLOCK_GATE_DISABLE VHRUNIT_CLOCK_GATE_DISABLE -> ILK_VHRUNIT_CLOCK_GATE_DISABLE (ILK+ code) This commit is only a renaming commit, further commits will clean up the logic. v2: Rename bit 5 and 7 to _ENABLE as setting them to 1 enables clock gating on their respective units, contrary to all of the other bits (Paulo Zanoni) Signed-off-by: Damien Lespiau <damien.lespiau@intel.com> Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-10-19 23:55:41 +07:00
I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
/* WaDisableEarlyCull */
I915_WRITE(_3D_CHICKEN3,
_MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
/* WaDisableBackToBackFlipFix */
I915_WRITE(IVB_CHICKEN3,
CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
CHICKEN3_DGMG_DONE_FIX_DISABLE);
/* WaDisablePSDDualDispatchEnable */
I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
_MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
/* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */
I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
/* WaApplyL3ControlAndL3ChickenMode requires those two on Ivy Bridge */
I915_WRITE(GEN7_L3CNTLREG1, I915_READ(GEN7_L3CNTLREG1) | GEN7_L3AGDIS);
I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
/* WaForceL3Serialization */
I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
~L3SQ_URB_READ_CAM_MATCH_DISABLE);
/* WaDisableDopClockGating */
I915_WRITE(GEN7_ROW_CHICKEN2,
_MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
/* WaForceL3Serialization */
I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
~L3SQ_URB_READ_CAM_MATCH_DISABLE);
/* This is required by WaCatErrorRejectionIssue */
I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
/* WaMbcDriverBootEnable */
I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
GEN6_MBCTL_ENABLE_BOOT_FETCH);
/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
* gating disable must be set. Failure to set it results in
* flickering pixels due to Z write ordering failures after
* some amount of runtime in the Mesa "fire" demo, and Unigine
* Sanctuary and Tropics, and apparently anything else with
* alpha test or pixel discard.
*
* According to the spec, bit 11 (RCCUNIT) must also be set,
* but we didn't debug actual testcases to find it out.
*
* According to the spec, bit 13 (RCZUNIT) must be set on IVB.
* This implements the WaDisableRCZUnitClockGating workaround.
*
* Also apply WaDisableVDSUnitClockGating and
* WaDisableRCPBUnitClockGating.
*/
I915_WRITE(GEN6_UCGCTL2,
GEN7_VDSUNIT_CLOCK_GATE_DISABLE |
GEN7_TDLUNIT_CLOCK_GATE_DISABLE |
GEN6_RCZUNIT_CLOCK_GATE_DISABLE |
GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
I915_WRITE(GEN7_UCGCTL4, GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
for_each_pipe(pipe) {
I915_WRITE(DSPCNTR(pipe),
I915_READ(DSPCNTR(pipe)) |
DISPPLANE_TRICKLE_FEED_DISABLE);
intel_flush_display_plane(dev_priv, pipe);
}
I915_WRITE(CACHE_MODE_1,
_MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
/*
* WaDisableVLVClockGating_VBIIssue
* Disable clock gating on th GCFG unit to prevent a delay
* in the reporting of vblank events.
*/
I915_WRITE(VLV_GUNIT_CLOCK_GATE, 0xffffffff);
/* Conservative clock gating settings for now */
I915_WRITE(0x9400, 0xffffffff);
I915_WRITE(0x9404, 0xffffffff);
I915_WRITE(0x9408, 0xffffffff);
I915_WRITE(0x940c, 0xffffffff);
I915_WRITE(0x9410, 0xffffffff);
I915_WRITE(0x9414, 0xffffffff);
I915_WRITE(0x9418, 0xffffffff);
}
static void g4x_init_clock_gating(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t dspclk_gate;
I915_WRITE(RENCLK_GATE_D1, 0);
I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
GS_UNIT_CLOCK_GATE_DISABLE |
CL_UNIT_CLOCK_GATE_DISABLE);
I915_WRITE(RAMCLK_GATE_D, 0);
dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
OVRUNIT_CLOCK_GATE_DISABLE |
OVCUNIT_CLOCK_GATE_DISABLE;
if (IS_GM45(dev))
dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
/* WaDisableRenderCachePipelinedFlush */
I915_WRITE(CACHE_MODE_0,
_MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
}
static void crestline_init_clock_gating(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
I915_WRITE(RENCLK_GATE_D2, 0);
I915_WRITE(DSPCLK_GATE_D, 0);
I915_WRITE(RAMCLK_GATE_D, 0);
I915_WRITE16(DEUC, 0);
}
static void broadwater_init_clock_gating(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
I965_RCC_CLOCK_GATE_DISABLE |
I965_RCPB_CLOCK_GATE_DISABLE |
I965_ISC_CLOCK_GATE_DISABLE |
I965_FBC_CLOCK_GATE_DISABLE);
I915_WRITE(RENCLK_GATE_D2, 0);
}
static void gen3_init_clock_gating(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u32 dstate = I915_READ(D_STATE);
dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
DSTATE_DOT_CLOCK_GATING;
I915_WRITE(D_STATE, dstate);
if (IS_PINEVIEW(dev))
I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
/* IIR "flip pending" means done if this bit is set */
I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
}
static void i85x_init_clock_gating(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
}
static void i830_init_clock_gating(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
}
void intel_init_clock_gating(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
dev_priv->display.init_clock_gating(dev);
}
/**
* We should only use the power well if we explicitly asked the hardware to
* enable it, so check if it's enabled and also check if we've requested it to
* be enabled.
*/
bool intel_using_power_well(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
if (IS_HASWELL(dev))
return I915_READ(HSW_PWR_WELL_DRIVER) ==
(HSW_PWR_WELL_ENABLE | HSW_PWR_WELL_STATE);
else
return true;
}
void intel_set_power_well(struct drm_device *dev, bool enable)
{
struct drm_i915_private *dev_priv = dev->dev_private;
bool is_enabled, enable_requested;
uint32_t tmp;
if (!HAS_POWER_WELL(dev))
return;
if (!i915_disable_power_well && !enable)
return;
tmp = I915_READ(HSW_PWR_WELL_DRIVER);
is_enabled = tmp & HSW_PWR_WELL_STATE;
enable_requested = tmp & HSW_PWR_WELL_ENABLE;
if (enable) {
if (!enable_requested)
I915_WRITE(HSW_PWR_WELL_DRIVER, HSW_PWR_WELL_ENABLE);
if (!is_enabled) {
DRM_DEBUG_KMS("Enabling power well\n");
if (wait_for((I915_READ(HSW_PWR_WELL_DRIVER) &
HSW_PWR_WELL_STATE), 20))
DRM_ERROR("Timeout enabling power well\n");
}
} else {
if (enable_requested) {
I915_WRITE(HSW_PWR_WELL_DRIVER, 0);
DRM_DEBUG_KMS("Requesting to disable the power well\n");
}
}
}
/*
* Starting with Haswell, we have a "Power Down Well" that can be turned off
* when not needed anymore. We have 4 registers that can request the power well
* to be enabled, and it will only be disabled if none of the registers is
* requesting it to be enabled.
*/
void intel_init_power_well(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
if (!HAS_POWER_WELL(dev))
return;
/* For now, we need the power well to be always enabled. */
intel_set_power_well(dev, true);
/* We're taking over the BIOS, so clear any requests made by it since
* the driver is in charge now. */
if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE)
I915_WRITE(HSW_PWR_WELL_BIOS, 0);
}
/* Set up chip specific power management-related functions */
void intel_init_pm(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
if (I915_HAS_FBC(dev)) {
if (HAS_PCH_SPLIT(dev)) {
dev_priv->display.fbc_enabled = ironlake_fbc_enabled;
dev_priv->display.enable_fbc = ironlake_enable_fbc;
dev_priv->display.disable_fbc = ironlake_disable_fbc;
} else if (IS_GM45(dev)) {
dev_priv->display.fbc_enabled = g4x_fbc_enabled;
dev_priv->display.enable_fbc = g4x_enable_fbc;
dev_priv->display.disable_fbc = g4x_disable_fbc;
} else if (IS_CRESTLINE(dev)) {
dev_priv->display.fbc_enabled = i8xx_fbc_enabled;
dev_priv->display.enable_fbc = i8xx_enable_fbc;
dev_priv->display.disable_fbc = i8xx_disable_fbc;
}
/* 855GM needs testing */
}
/* For cxsr */
if (IS_PINEVIEW(dev))
i915_pineview_get_mem_freq(dev);
else if (IS_GEN5(dev))
i915_ironlake_get_mem_freq(dev);
/* For FIFO watermark updates */
if (HAS_PCH_SPLIT(dev)) {
if (IS_GEN5(dev)) {
if (I915_READ(MLTR_ILK) & ILK_SRLT_MASK)
dev_priv->display.update_wm = ironlake_update_wm;
else {
DRM_DEBUG_KMS("Failed to get proper latency. "
"Disable CxSR\n");
dev_priv->display.update_wm = NULL;
}
dev_priv->display.init_clock_gating = ironlake_init_clock_gating;
} else if (IS_GEN6(dev)) {
if (SNB_READ_WM0_LATENCY()) {
dev_priv->display.update_wm = sandybridge_update_wm;
dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
} else {
DRM_DEBUG_KMS("Failed to read display plane latency. "
"Disable CxSR\n");
dev_priv->display.update_wm = NULL;
}
dev_priv->display.init_clock_gating = gen6_init_clock_gating;
} else if (IS_IVYBRIDGE(dev)) {
if (SNB_READ_WM0_LATENCY()) {
dev_priv->display.update_wm = ivybridge_update_wm;
dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
} else {
DRM_DEBUG_KMS("Failed to read display plane latency. "
"Disable CxSR\n");
dev_priv->display.update_wm = NULL;
}
dev_priv->display.init_clock_gating = ivybridge_init_clock_gating;
} else if (IS_HASWELL(dev)) {
if (SNB_READ_WM0_LATENCY()) {
dev_priv->display.update_wm = sandybridge_update_wm;
dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
dev_priv->display.update_linetime_wm = haswell_update_linetime_wm;
} else {
DRM_DEBUG_KMS("Failed to read display plane latency. "
"Disable CxSR\n");
dev_priv->display.update_wm = NULL;
}
dev_priv->display.init_clock_gating = haswell_init_clock_gating;
} else
dev_priv->display.update_wm = NULL;
} else if (IS_VALLEYVIEW(dev)) {
dev_priv->display.update_wm = valleyview_update_wm;
dev_priv->display.init_clock_gating =
valleyview_init_clock_gating;
} else if (IS_PINEVIEW(dev)) {
if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev),
dev_priv->is_ddr3,
dev_priv->fsb_freq,
dev_priv->mem_freq)) {
DRM_INFO("failed to find known CxSR latency "
"(found ddr%s fsb freq %d, mem freq %d), "
"disabling CxSR\n",
(dev_priv->is_ddr3 == 1) ? "3" : "2",
dev_priv->fsb_freq, dev_priv->mem_freq);
/* Disable CxSR and never update its watermark again */
pineview_disable_cxsr(dev);
dev_priv->display.update_wm = NULL;
} else
dev_priv->display.update_wm = pineview_update_wm;
dev_priv->display.init_clock_gating = gen3_init_clock_gating;
} else if (IS_G4X(dev)) {
dev_priv->display.update_wm = g4x_update_wm;
dev_priv->display.init_clock_gating = g4x_init_clock_gating;
} else if (IS_GEN4(dev)) {
dev_priv->display.update_wm = i965_update_wm;
if (IS_CRESTLINE(dev))
dev_priv->display.init_clock_gating = crestline_init_clock_gating;
else if (IS_BROADWATER(dev))
dev_priv->display.init_clock_gating = broadwater_init_clock_gating;
} else if (IS_GEN3(dev)) {
dev_priv->display.update_wm = i9xx_update_wm;
dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
dev_priv->display.init_clock_gating = gen3_init_clock_gating;
} else if (IS_I865G(dev)) {
dev_priv->display.update_wm = i830_update_wm;
dev_priv->display.init_clock_gating = i85x_init_clock_gating;
dev_priv->display.get_fifo_size = i830_get_fifo_size;
} else if (IS_I85X(dev)) {
dev_priv->display.update_wm = i9xx_update_wm;
dev_priv->display.get_fifo_size = i85x_get_fifo_size;
dev_priv->display.init_clock_gating = i85x_init_clock_gating;
} else {
dev_priv->display.update_wm = i830_update_wm;
dev_priv->display.init_clock_gating = i830_init_clock_gating;
if (IS_845G(dev))
dev_priv->display.get_fifo_size = i845_get_fifo_size;
else
dev_priv->display.get_fifo_size = i830_get_fifo_size;
}
}
static void __gen6_gt_wait_for_thread_c0(struct drm_i915_private *dev_priv)
{
u32 gt_thread_status_mask;
if (IS_HASWELL(dev_priv->dev))
gt_thread_status_mask = GEN6_GT_THREAD_STATUS_CORE_MASK_HSW;
else
gt_thread_status_mask = GEN6_GT_THREAD_STATUS_CORE_MASK;
/* w/a for a sporadic read returning 0 by waiting for the GT
* thread to wake up.
*/
if (wait_for_atomic_us((I915_READ_NOTRACE(GEN6_GT_THREAD_STATUS_REG) & gt_thread_status_mask) == 0, 500))
DRM_ERROR("GT thread status wait timed out\n");
}
static void __gen6_gt_force_wake_reset(struct drm_i915_private *dev_priv)
{
I915_WRITE_NOTRACE(FORCEWAKE, 0);
POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */
}
static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
{
if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK) & 1) == 0,
FORCEWAKE_ACK_TIMEOUT_MS))
DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
I915_WRITE_NOTRACE(FORCEWAKE, 1);
POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */
if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK) & 1),
FORCEWAKE_ACK_TIMEOUT_MS))
DRM_ERROR("Timed out waiting for forcewake to ack request.\n");
__gen6_gt_wait_for_thread_c0(dev_priv);
}
static void __gen6_gt_force_wake_mt_reset(struct drm_i915_private *dev_priv)
{
I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(0xffff));
drm/i915: fix FORCEWAKE posting reads We stopped reading FORCEWAKE for posting reads in commit 8dee3eea3ccd3b6c00a8d3a08dd715d6adf737dd Author: Ben Widawsky <ben@bwidawsk.net> Date: Sat Sep 1 22:59:50 2012 -0700 drm/i915: Never read FORCEWAKE and started using something from the same cacheline instead. On the bug reporter's machine this broke entering rc6 states after a suspend/resume cycle. It turns out reading ECOBUS as posting read worked fine, while GTFIFODBG did not, preventing RC6 states after suspend/resume per the bug report referenced below. It's not entirely clear why, but clearly GTFIFODBG was nowhere near the same cacheline or address range as FORCEWAKE. Trying out various registers for posting reads showed that all tested registers for which NEEDS_FORCE_WAKE() (in i915_drv.c) returns true work. Conversely, most (but not quite all) registers for which NEEDS_FORCE_WAKE() returns false do not work. Details in the referenced bug. Based on the above, add posting reads on ECOBUS where GTFIFODBG was previously relied on. In true cargo cult spirit, add posting reads for FORCEWAKE_VLV writes as well, but instead of ECOBUS, use FORCEWAKE_ACK_VLV which is in the same address range as FORCEWAKE_VLV. v2: Add more details to the commit message. No functional changes. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=52411 Reported-and-tested-by: Alexander Bersenev <bay@hackerdom.ru> CC: Ben Widawsky <ben@bwidawsk.net> Signed-off-by: Jani Nikula <jani.nikula@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: stable@vger.kernel.org [danvet: add cc: stable and make the commit message a bit clearer that this is a regression fix and what exactly broke.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-01-17 15:24:09 +07:00
/* something from same cacheline, but !FORCEWAKE_MT */
POSTING_READ(ECOBUS);
}
static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv)
{
u32 forcewake_ack;
if (IS_HASWELL(dev_priv->dev))
forcewake_ack = FORCEWAKE_ACK_HSW;
else
forcewake_ack = FORCEWAKE_MT_ACK;
if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & FORCEWAKE_KERNEL) == 0,
FORCEWAKE_ACK_TIMEOUT_MS))
DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
drm/i915: fix FORCEWAKE posting reads We stopped reading FORCEWAKE for posting reads in commit 8dee3eea3ccd3b6c00a8d3a08dd715d6adf737dd Author: Ben Widawsky <ben@bwidawsk.net> Date: Sat Sep 1 22:59:50 2012 -0700 drm/i915: Never read FORCEWAKE and started using something from the same cacheline instead. On the bug reporter's machine this broke entering rc6 states after a suspend/resume cycle. It turns out reading ECOBUS as posting read worked fine, while GTFIFODBG did not, preventing RC6 states after suspend/resume per the bug report referenced below. It's not entirely clear why, but clearly GTFIFODBG was nowhere near the same cacheline or address range as FORCEWAKE. Trying out various registers for posting reads showed that all tested registers for which NEEDS_FORCE_WAKE() (in i915_drv.c) returns true work. Conversely, most (but not quite all) registers for which NEEDS_FORCE_WAKE() returns false do not work. Details in the referenced bug. Based on the above, add posting reads on ECOBUS where GTFIFODBG was previously relied on. In true cargo cult spirit, add posting reads for FORCEWAKE_VLV writes as well, but instead of ECOBUS, use FORCEWAKE_ACK_VLV which is in the same address range as FORCEWAKE_VLV. v2: Add more details to the commit message. No functional changes. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=52411 Reported-and-tested-by: Alexander Bersenev <bay@hackerdom.ru> CC: Ben Widawsky <ben@bwidawsk.net> Signed-off-by: Jani Nikula <jani.nikula@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: stable@vger.kernel.org [danvet: add cc: stable and make the commit message a bit clearer that this is a regression fix and what exactly broke.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-01-17 15:24:09 +07:00
/* something from same cacheline, but !FORCEWAKE_MT */
POSTING_READ(ECOBUS);
if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & FORCEWAKE_KERNEL),
FORCEWAKE_ACK_TIMEOUT_MS))
DRM_ERROR("Timed out waiting for forcewake to ack request.\n");
__gen6_gt_wait_for_thread_c0(dev_priv);
}
/*
* Generally this is called implicitly by the register read function. However,
* if some sequence requires the GT to not power down then this function should
* be called at the beginning of the sequence followed by a call to
* gen6_gt_force_wake_put() at the end of the sequence.
*/
void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
{
unsigned long irqflags;
spin_lock_irqsave(&dev_priv->gt_lock, irqflags);
if (dev_priv->forcewake_count++ == 0)
dev_priv->gt.force_wake_get(dev_priv);
spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags);
}
void gen6_gt_check_fifodbg(struct drm_i915_private *dev_priv)
{
u32 gtfifodbg;
gtfifodbg = I915_READ_NOTRACE(GTFIFODBG);
if (WARN(gtfifodbg & GT_FIFO_CPU_ERROR_MASK,
"MMIO read or write has been dropped %x\n", gtfifodbg))
I915_WRITE_NOTRACE(GTFIFODBG, GT_FIFO_CPU_ERROR_MASK);
}
static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
{
I915_WRITE_NOTRACE(FORCEWAKE, 0);
drm/i915: fix FORCEWAKE posting reads We stopped reading FORCEWAKE for posting reads in commit 8dee3eea3ccd3b6c00a8d3a08dd715d6adf737dd Author: Ben Widawsky <ben@bwidawsk.net> Date: Sat Sep 1 22:59:50 2012 -0700 drm/i915: Never read FORCEWAKE and started using something from the same cacheline instead. On the bug reporter's machine this broke entering rc6 states after a suspend/resume cycle. It turns out reading ECOBUS as posting read worked fine, while GTFIFODBG did not, preventing RC6 states after suspend/resume per the bug report referenced below. It's not entirely clear why, but clearly GTFIFODBG was nowhere near the same cacheline or address range as FORCEWAKE. Trying out various registers for posting reads showed that all tested registers for which NEEDS_FORCE_WAKE() (in i915_drv.c) returns true work. Conversely, most (but not quite all) registers for which NEEDS_FORCE_WAKE() returns false do not work. Details in the referenced bug. Based on the above, add posting reads on ECOBUS where GTFIFODBG was previously relied on. In true cargo cult spirit, add posting reads for FORCEWAKE_VLV writes as well, but instead of ECOBUS, use FORCEWAKE_ACK_VLV which is in the same address range as FORCEWAKE_VLV. v2: Add more details to the commit message. No functional changes. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=52411 Reported-and-tested-by: Alexander Bersenev <bay@hackerdom.ru> CC: Ben Widawsky <ben@bwidawsk.net> Signed-off-by: Jani Nikula <jani.nikula@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: stable@vger.kernel.org [danvet: add cc: stable and make the commit message a bit clearer that this is a regression fix and what exactly broke.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-01-17 15:24:09 +07:00
/* something from same cacheline, but !FORCEWAKE */
POSTING_READ(ECOBUS);
gen6_gt_check_fifodbg(dev_priv);
}
static void __gen6_gt_force_wake_mt_put(struct drm_i915_private *dev_priv)
{
I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
drm/i915: fix FORCEWAKE posting reads We stopped reading FORCEWAKE for posting reads in commit 8dee3eea3ccd3b6c00a8d3a08dd715d6adf737dd Author: Ben Widawsky <ben@bwidawsk.net> Date: Sat Sep 1 22:59:50 2012 -0700 drm/i915: Never read FORCEWAKE and started using something from the same cacheline instead. On the bug reporter's machine this broke entering rc6 states after a suspend/resume cycle. It turns out reading ECOBUS as posting read worked fine, while GTFIFODBG did not, preventing RC6 states after suspend/resume per the bug report referenced below. It's not entirely clear why, but clearly GTFIFODBG was nowhere near the same cacheline or address range as FORCEWAKE. Trying out various registers for posting reads showed that all tested registers for which NEEDS_FORCE_WAKE() (in i915_drv.c) returns true work. Conversely, most (but not quite all) registers for which NEEDS_FORCE_WAKE() returns false do not work. Details in the referenced bug. Based on the above, add posting reads on ECOBUS where GTFIFODBG was previously relied on. In true cargo cult spirit, add posting reads for FORCEWAKE_VLV writes as well, but instead of ECOBUS, use FORCEWAKE_ACK_VLV which is in the same address range as FORCEWAKE_VLV. v2: Add more details to the commit message. No functional changes. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=52411 Reported-and-tested-by: Alexander Bersenev <bay@hackerdom.ru> CC: Ben Widawsky <ben@bwidawsk.net> Signed-off-by: Jani Nikula <jani.nikula@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: stable@vger.kernel.org [danvet: add cc: stable and make the commit message a bit clearer that this is a regression fix and what exactly broke.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-01-17 15:24:09 +07:00
/* something from same cacheline, but !FORCEWAKE_MT */
POSTING_READ(ECOBUS);
gen6_gt_check_fifodbg(dev_priv);
}
/*
* see gen6_gt_force_wake_get()
*/
void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
{
unsigned long irqflags;
spin_lock_irqsave(&dev_priv->gt_lock, irqflags);
if (--dev_priv->forcewake_count == 0)
dev_priv->gt.force_wake_put(dev_priv);
spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags);
}
int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
{
int ret = 0;
if (dev_priv->gt_fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES) {
int loop = 500;
u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) {
udelay(10);
fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
}
if (WARN_ON(loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES))
++ret;
dev_priv->gt_fifo_count = fifo;
}
dev_priv->gt_fifo_count--;
return ret;
}
static void vlv_force_wake_reset(struct drm_i915_private *dev_priv)
{
I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(0xffff));
drm/i915: fix FORCEWAKE posting reads We stopped reading FORCEWAKE for posting reads in commit 8dee3eea3ccd3b6c00a8d3a08dd715d6adf737dd Author: Ben Widawsky <ben@bwidawsk.net> Date: Sat Sep 1 22:59:50 2012 -0700 drm/i915: Never read FORCEWAKE and started using something from the same cacheline instead. On the bug reporter's machine this broke entering rc6 states after a suspend/resume cycle. It turns out reading ECOBUS as posting read worked fine, while GTFIFODBG did not, preventing RC6 states after suspend/resume per the bug report referenced below. It's not entirely clear why, but clearly GTFIFODBG was nowhere near the same cacheline or address range as FORCEWAKE. Trying out various registers for posting reads showed that all tested registers for which NEEDS_FORCE_WAKE() (in i915_drv.c) returns true work. Conversely, most (but not quite all) registers for which NEEDS_FORCE_WAKE() returns false do not work. Details in the referenced bug. Based on the above, add posting reads on ECOBUS where GTFIFODBG was previously relied on. In true cargo cult spirit, add posting reads for FORCEWAKE_VLV writes as well, but instead of ECOBUS, use FORCEWAKE_ACK_VLV which is in the same address range as FORCEWAKE_VLV. v2: Add more details to the commit message. No functional changes. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=52411 Reported-and-tested-by: Alexander Bersenev <bay@hackerdom.ru> CC: Ben Widawsky <ben@bwidawsk.net> Signed-off-by: Jani Nikula <jani.nikula@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: stable@vger.kernel.org [danvet: add cc: stable and make the commit message a bit clearer that this is a regression fix and what exactly broke.] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2013-01-17 15:24:09 +07:00
/* something from same cacheline, but !FORCEWAKE_VLV */
POSTING_READ(FORCEWAKE_ACK_VLV);
}
static void vlv_force_wake_get(struct drm_i915_private *dev_priv)
{
if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL) == 0,
FORCEWAKE_ACK_TIMEOUT_MS))
DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
I915_WRITE_NOTRACE(FORCEWAKE_MEDIA_VLV,
_MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL),
FORCEWAKE_ACK_TIMEOUT_MS))
DRM_ERROR("Timed out waiting for GT to ack forcewake request.\n");
if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_MEDIA_VLV) &
FORCEWAKE_KERNEL),
FORCEWAKE_ACK_TIMEOUT_MS))
DRM_ERROR("Timed out waiting for media to ack forcewake request.\n");
__gen6_gt_wait_for_thread_c0(dev_priv);
}
static void vlv_force_wake_put(struct drm_i915_private *dev_priv)
{
I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
I915_WRITE_NOTRACE(FORCEWAKE_MEDIA_VLV,
_MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
/* The below doubles as a POSTING_READ */
gen6_gt_check_fifodbg(dev_priv);
}
void intel_gt_reset(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
if (IS_VALLEYVIEW(dev)) {
vlv_force_wake_reset(dev_priv);
} else if (INTEL_INFO(dev)->gen >= 6) {
__gen6_gt_force_wake_reset(dev_priv);
if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev))
__gen6_gt_force_wake_mt_reset(dev_priv);
}
}
void intel_gt_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
spin_lock_init(&dev_priv->gt_lock);
intel_gt_reset(dev);
if (IS_VALLEYVIEW(dev)) {
dev_priv->gt.force_wake_get = vlv_force_wake_get;
dev_priv->gt.force_wake_put = vlv_force_wake_put;
} else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
dev_priv->gt.force_wake_get = __gen6_gt_force_wake_mt_get;
dev_priv->gt.force_wake_put = __gen6_gt_force_wake_mt_put;
} else if (IS_GEN6(dev)) {
dev_priv->gt.force_wake_get = __gen6_gt_force_wake_get;
dev_priv->gt.force_wake_put = __gen6_gt_force_wake_put;
}
INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
intel_gen6_powersave_work);
}
int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val)
{
WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n");
return -EAGAIN;
}
I915_WRITE(GEN6_PCODE_DATA, *val);
I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
500)) {
DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
return -ETIMEDOUT;
}
*val = I915_READ(GEN6_PCODE_DATA);
I915_WRITE(GEN6_PCODE_DATA, 0);
return 0;
}
int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val)
{
WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n");
return -EAGAIN;
}
I915_WRITE(GEN6_PCODE_DATA, val);
I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
500)) {
DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
return -ETIMEDOUT;
}
I915_WRITE(GEN6_PCODE_DATA, 0);
return 0;
}
static int vlv_punit_rw(struct drm_i915_private *dev_priv, u32 port, u8 opcode,
u8 addr, u32 *val)
{
u32 cmd, devfn, be, bar;
bar = 0;
be = 0xf;
devfn = PCI_DEVFN(2, 0);
cmd = (devfn << IOSF_DEVFN_SHIFT) | (opcode << IOSF_OPCODE_SHIFT) |
(port << IOSF_PORT_SHIFT) | (be << IOSF_BYTE_ENABLES_SHIFT) |
(bar << IOSF_BAR_SHIFT);
WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
if (I915_READ(VLV_IOSF_DOORBELL_REQ) & IOSF_SB_BUSY) {
DRM_DEBUG_DRIVER("warning: pcode (%s) mailbox access failed\n",
opcode == PUNIT_OPCODE_REG_READ ?
"read" : "write");
return -EAGAIN;
}
I915_WRITE(VLV_IOSF_ADDR, addr);
if (opcode == PUNIT_OPCODE_REG_WRITE)
I915_WRITE(VLV_IOSF_DATA, *val);
I915_WRITE(VLV_IOSF_DOORBELL_REQ, cmd);
if (wait_for((I915_READ(VLV_IOSF_DOORBELL_REQ) & IOSF_SB_BUSY) == 0,
5)) {
DRM_ERROR("timeout waiting for pcode %s (%d) to finish\n",
opcode == PUNIT_OPCODE_REG_READ ? "read" : "write",
addr);
return -ETIMEDOUT;
}
if (opcode == PUNIT_OPCODE_REG_READ)
*val = I915_READ(VLV_IOSF_DATA);
I915_WRITE(VLV_IOSF_DATA, 0);
return 0;
}
int valleyview_punit_read(struct drm_i915_private *dev_priv, u8 addr, u32 *val)
{
return vlv_punit_rw(dev_priv, IOSF_PORT_PUNIT, PUNIT_OPCODE_REG_READ,
addr, val);
}
int valleyview_punit_write(struct drm_i915_private *dev_priv, u8 addr, u32 val)
{
return vlv_punit_rw(dev_priv, IOSF_PORT_PUNIT, PUNIT_OPCODE_REG_WRITE,
addr, &val);
}
int valleyview_nc_read(struct drm_i915_private *dev_priv, u8 addr, u32 *val)
{
return vlv_punit_rw(dev_priv, IOSF_PORT_NC, PUNIT_OPCODE_REG_READ,
addr, val);
}
int vlv_gpu_freq(int ddr_freq, int val)
{
int mult, base;
switch (ddr_freq) {
case 800:
mult = 20;
base = 120;
break;
case 1066:
mult = 22;
base = 133;
break;
case 1333:
mult = 21;
base = 125;
break;
default:
return -1;
}
return ((val - 0xbd) * mult) + base;
}
int vlv_freq_opcode(int ddr_freq, int val)
{
int mult, base;
switch (ddr_freq) {
case 800:
mult = 20;
base = 120;
break;
case 1066:
mult = 22;
base = 133;
break;
case 1333:
mult = 21;
base = 125;
break;
default:
return -1;
}
val /= mult;
val -= base / mult;
val += 0xbd;
if (val > 0xea)
val = 0xea;
return val;
}