2015-06-18 17:06:16 +07:00
|
|
|
/*
|
|
|
|
* Copyright © 2015 Intel Corporation
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
|
|
|
|
#include <drm/i915_drm.h>
|
|
|
|
|
|
|
|
#include "i915_drv.h"
|
2019-08-06 18:39:33 +07:00
|
|
|
#include "intel_display_types.h"
|
2019-04-29 19:50:11 +07:00
|
|
|
#include "intel_hotplug.h"
|
2015-06-18 17:06:16 +07:00
|
|
|
|
2015-07-02 20:05:28 +07:00
|
|
|
/**
|
|
|
|
* DOC: Hotplug
|
|
|
|
*
|
|
|
|
* Simply put, hotplug occurs when a display is connected to or disconnected
|
|
|
|
* from the system. However, there may be adapters and docking stations and
|
|
|
|
* Display Port short pulses and MST devices involved, complicating matters.
|
|
|
|
*
|
|
|
|
* Hotplug in i915 is handled in many different levels of abstraction.
|
|
|
|
*
|
|
|
|
* The platform dependent interrupt handling code in i915_irq.c enables,
|
|
|
|
* disables, and does preliminary handling of the interrupts. The interrupt
|
|
|
|
* handlers gather the hotplug detect (HPD) information from relevant registers
|
|
|
|
* into a platform independent mask of hotplug pins that have fired.
|
|
|
|
*
|
|
|
|
* The platform independent interrupt handler intel_hpd_irq_handler() in
|
|
|
|
* intel_hotplug.c does hotplug irq storm detection and mitigation, and passes
|
|
|
|
* further processing to appropriate bottom halves (Display Port specific and
|
|
|
|
* regular hotplug).
|
|
|
|
*
|
|
|
|
* The Display Port work function i915_digport_work_func() calls into
|
|
|
|
* intel_dp_hpd_pulse() via hooks, which handles DP short pulses and DP MST long
|
|
|
|
* pulses, with failures and non-MST long pulses triggering regular hotplug
|
|
|
|
* processing on the connector.
|
|
|
|
*
|
|
|
|
* The regular hotplug work function i915_hotplug_work_func() calls connector
|
|
|
|
* detect hooks, and, if connector status changes, triggers sending of hotplug
|
|
|
|
* uevent to userspace via drm_kms_helper_hotplug_event().
|
|
|
|
*
|
|
|
|
* Finally, the userspace is responsible for triggering a modeset upon receiving
|
|
|
|
* the hotplug uevent, disabling or enabling the crtc as needed.
|
|
|
|
*
|
|
|
|
* The hotplug interrupt storm detection and mitigation code keeps track of the
|
|
|
|
* number of interrupts per hotplug pin per a period of time, and if the number
|
|
|
|
* of interrupts exceeds a certain threshold, the interrupt is disabled for a
|
|
|
|
* while before being re-enabled. The intention is to mitigate issues raising
|
|
|
|
* from broken hardware triggering massive amounts of interrupts and grinding
|
|
|
|
* the system to a halt.
|
2015-07-10 14:00:43 +07:00
|
|
|
*
|
|
|
|
* Current implementation expects that hotplug interrupt storm will not be
|
|
|
|
* seen when display port sink is connected, hence on platforms whose DP
|
|
|
|
* callback is handled by i915_digport_work_func reenabling of hpd is not
|
|
|
|
* performed (it was never expected to be disabled in the first place ;) )
|
|
|
|
* this is specific to DP sinks handled by this routine and any other display
|
|
|
|
* such as HDMI or DVI enabled on the same port will have proper logic since
|
|
|
|
* it will use i915_hotplug_work_func where this logic is handled.
|
2015-07-02 20:05:28 +07:00
|
|
|
*/
|
|
|
|
|
2017-08-12 01:26:50 +07:00
|
|
|
/**
|
2018-01-30 06:22:21 +07:00
|
|
|
* intel_hpd_pin_default - return default pin associated with certain port.
|
|
|
|
* @dev_priv: private driver data pointer
|
2017-08-12 01:26:50 +07:00
|
|
|
* @port: the hpd port to get associated pin
|
|
|
|
*
|
2018-01-30 06:22:21 +07:00
|
|
|
* It is only valid and used by digital port encoder.
|
|
|
|
*
|
2017-08-12 01:26:50 +07:00
|
|
|
* Return pin that is associatade with @port and HDP_NONE if no pin is
|
|
|
|
* hard associated with that @port.
|
|
|
|
*/
|
2018-01-30 06:22:21 +07:00
|
|
|
enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv,
|
|
|
|
enum port port)
|
2017-08-12 01:26:50 +07:00
|
|
|
{
|
|
|
|
switch (port) {
|
|
|
|
case PORT_A:
|
|
|
|
return HPD_PORT_A;
|
|
|
|
case PORT_B:
|
|
|
|
return HPD_PORT_B;
|
|
|
|
case PORT_C:
|
|
|
|
return HPD_PORT_C;
|
|
|
|
case PORT_D:
|
|
|
|
return HPD_PORT_D;
|
|
|
|
case PORT_E:
|
|
|
|
return HPD_PORT_E;
|
2018-01-30 06:22:21 +07:00
|
|
|
case PORT_F:
|
|
|
|
if (IS_CNL_WITH_PORT_F(dev_priv))
|
|
|
|
return HPD_PORT_E;
|
2018-03-24 00:24:17 +07:00
|
|
|
return HPD_PORT_F;
|
2019-07-26 06:48:11 +07:00
|
|
|
case PORT_G:
|
|
|
|
return HPD_PORT_G;
|
|
|
|
case PORT_H:
|
|
|
|
return HPD_PORT_H;
|
|
|
|
case PORT_I:
|
|
|
|
return HPD_PORT_I;
|
2017-08-12 01:26:50 +07:00
|
|
|
default:
|
|
|
|
MISSING_CASE(port);
|
|
|
|
return HPD_NONE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-18 17:06:16 +07:00
|
|
|
#define HPD_STORM_DETECT_PERIOD 1000
|
|
|
|
#define HPD_STORM_REENABLE_DELAY (2 * 60 * 1000)
|
2019-07-12 07:53:42 +07:00
|
|
|
#define HPD_RETRY_DELAY 1000
|
2015-06-18 17:06:16 +07:00
|
|
|
|
|
|
|
/**
|
drm/i915: Add short HPD IRQ storm detection for non-MST systems
Unfortunately, it seems that the HPD IRQ storm problem from the early
days of Intel GPUs was never entirely solved, only mostly. Within the
last couple of days, I got a bug report from one of our customers who
had been having issues with their machine suddenly booting up very
slowly after having updated. The amount of time it took to boot went
from around 30 seconds, to over 6 minutes consistently.
After some investigation, I discovered that i915 was reporting massive
amounts of short HPD IRQ spam on this system from the DisplayPort port,
despite there not being anything actually connected. The symptoms would
start with one "long" HPD IRQ being detected at boot:
[ 1.891398] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00440000, dig 0x00440000, pins 0x000000a0
[ 1.891436] [drm:intel_hpd_irq_handler [i915]] digital hpd port B - long
[ 1.891472] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 5 - cnt: 0
[ 1.891508] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - long
[ 1.891544] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 7 - cnt: 0
[ 1.891592] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port B - long
[ 1.891628] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port D - long
…
followed by constant short IRQs afterwards:
[ 1.895091] [drm:intel_encoder_hotplug [i915]] [CONNECTOR:66:DP-1] status updated from unknown to disconnected
[ 1.895129] [drm:i915_hotplug_work_func [i915]] Connector DP-3 (pin 7) received hotplug event.
[ 1.895165] [drm:intel_dp_detect [i915]] [CONNECTOR:72:DP-3]
[ 1.895275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895312] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.895762] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895799] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896239] [drm:intel_dp_aux_xfer [i915]] dp_aux_ch timeout status 0x71450085
[ 1.896293] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896330] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896781] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896817] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.897275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
The customer's system in question has a GM45 GPU, which is apparently
well known for hotplugging storms.
So, workaround this impressively broken hardware by changing the default
HPD storm threshold from 5 to 50. Then, make long IRQs count for 10, and
short IRQs count for 1. This makes it so that 5 long IRQs will trigger
an HPD storm, and on systems with short HPD storm detection 50 short
IRQs will trigger an HPD storm. 50 short IRQs amounts to 100ms of
constant pulsing, which seems like a good middleground between being too
sensitive and not being sensitive enough (which would cause visible
stutters in userspace every time a storm occurs).
And just to be extra safe: we don't enable this by default on systems
with MST support. There's too high of a chance of MST support triggering
storm detection, and systems that are new enough to support MST are a
lot less likely to have issues with IRQ storms anyway.
As a note: this patch was tested using a ThinkPad T450s and a Chamelium
to simulate the short IRQ storms.
Changes since v1:
- Don't use two separate thresholds, just make long IRQs count for 10
each and short IRQs count for 1. This simplifies the code a bit
- Ville Syrjälä
Changes since v2:
- Document @long_hpd in intel_hpd_irq_storm_detect, no functional
changes
Changes since v4:
- Remove !! in long_hpd assignment - Ville Syrjälä
- queue_hp = true - Ville Syrjälä
Signed-off-by: Lyude Paul <lyude@redhat.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-6-lyude@redhat.com
2018-11-07 04:30:16 +07:00
|
|
|
* intel_hpd_irq_storm_detect - gather stats and detect HPD IRQ storm on a pin
|
2015-06-18 17:06:16 +07:00
|
|
|
* @dev_priv: private driver data pointer
|
|
|
|
* @pin: the pin to gather stats on
|
drm/i915: Add short HPD IRQ storm detection for non-MST systems
Unfortunately, it seems that the HPD IRQ storm problem from the early
days of Intel GPUs was never entirely solved, only mostly. Within the
last couple of days, I got a bug report from one of our customers who
had been having issues with their machine suddenly booting up very
slowly after having updated. The amount of time it took to boot went
from around 30 seconds, to over 6 minutes consistently.
After some investigation, I discovered that i915 was reporting massive
amounts of short HPD IRQ spam on this system from the DisplayPort port,
despite there not being anything actually connected. The symptoms would
start with one "long" HPD IRQ being detected at boot:
[ 1.891398] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00440000, dig 0x00440000, pins 0x000000a0
[ 1.891436] [drm:intel_hpd_irq_handler [i915]] digital hpd port B - long
[ 1.891472] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 5 - cnt: 0
[ 1.891508] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - long
[ 1.891544] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 7 - cnt: 0
[ 1.891592] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port B - long
[ 1.891628] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port D - long
…
followed by constant short IRQs afterwards:
[ 1.895091] [drm:intel_encoder_hotplug [i915]] [CONNECTOR:66:DP-1] status updated from unknown to disconnected
[ 1.895129] [drm:i915_hotplug_work_func [i915]] Connector DP-3 (pin 7) received hotplug event.
[ 1.895165] [drm:intel_dp_detect [i915]] [CONNECTOR:72:DP-3]
[ 1.895275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895312] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.895762] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895799] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896239] [drm:intel_dp_aux_xfer [i915]] dp_aux_ch timeout status 0x71450085
[ 1.896293] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896330] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896781] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896817] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.897275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
The customer's system in question has a GM45 GPU, which is apparently
well known for hotplugging storms.
So, workaround this impressively broken hardware by changing the default
HPD storm threshold from 5 to 50. Then, make long IRQs count for 10, and
short IRQs count for 1. This makes it so that 5 long IRQs will trigger
an HPD storm, and on systems with short HPD storm detection 50 short
IRQs will trigger an HPD storm. 50 short IRQs amounts to 100ms of
constant pulsing, which seems like a good middleground between being too
sensitive and not being sensitive enough (which would cause visible
stutters in userspace every time a storm occurs).
And just to be extra safe: we don't enable this by default on systems
with MST support. There's too high of a chance of MST support triggering
storm detection, and systems that are new enough to support MST are a
lot less likely to have issues with IRQ storms anyway.
As a note: this patch was tested using a ThinkPad T450s and a Chamelium
to simulate the short IRQ storms.
Changes since v1:
- Don't use two separate thresholds, just make long IRQs count for 10
each and short IRQs count for 1. This simplifies the code a bit
- Ville Syrjälä
Changes since v2:
- Document @long_hpd in intel_hpd_irq_storm_detect, no functional
changes
Changes since v4:
- Remove !! in long_hpd assignment - Ville Syrjälä
- queue_hp = true - Ville Syrjälä
Signed-off-by: Lyude Paul <lyude@redhat.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-6-lyude@redhat.com
2018-11-07 04:30:16 +07:00
|
|
|
* @long_hpd: whether the HPD IRQ was long or short
|
2015-06-18 17:06:16 +07:00
|
|
|
*
|
drm/i915: Add short HPD IRQ storm detection for non-MST systems
Unfortunately, it seems that the HPD IRQ storm problem from the early
days of Intel GPUs was never entirely solved, only mostly. Within the
last couple of days, I got a bug report from one of our customers who
had been having issues with their machine suddenly booting up very
slowly after having updated. The amount of time it took to boot went
from around 30 seconds, to over 6 minutes consistently.
After some investigation, I discovered that i915 was reporting massive
amounts of short HPD IRQ spam on this system from the DisplayPort port,
despite there not being anything actually connected. The symptoms would
start with one "long" HPD IRQ being detected at boot:
[ 1.891398] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00440000, dig 0x00440000, pins 0x000000a0
[ 1.891436] [drm:intel_hpd_irq_handler [i915]] digital hpd port B - long
[ 1.891472] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 5 - cnt: 0
[ 1.891508] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - long
[ 1.891544] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 7 - cnt: 0
[ 1.891592] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port B - long
[ 1.891628] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port D - long
…
followed by constant short IRQs afterwards:
[ 1.895091] [drm:intel_encoder_hotplug [i915]] [CONNECTOR:66:DP-1] status updated from unknown to disconnected
[ 1.895129] [drm:i915_hotplug_work_func [i915]] Connector DP-3 (pin 7) received hotplug event.
[ 1.895165] [drm:intel_dp_detect [i915]] [CONNECTOR:72:DP-3]
[ 1.895275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895312] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.895762] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895799] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896239] [drm:intel_dp_aux_xfer [i915]] dp_aux_ch timeout status 0x71450085
[ 1.896293] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896330] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896781] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896817] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.897275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
The customer's system in question has a GM45 GPU, which is apparently
well known for hotplugging storms.
So, workaround this impressively broken hardware by changing the default
HPD storm threshold from 5 to 50. Then, make long IRQs count for 10, and
short IRQs count for 1. This makes it so that 5 long IRQs will trigger
an HPD storm, and on systems with short HPD storm detection 50 short
IRQs will trigger an HPD storm. 50 short IRQs amounts to 100ms of
constant pulsing, which seems like a good middleground between being too
sensitive and not being sensitive enough (which would cause visible
stutters in userspace every time a storm occurs).
And just to be extra safe: we don't enable this by default on systems
with MST support. There's too high of a chance of MST support triggering
storm detection, and systems that are new enough to support MST are a
lot less likely to have issues with IRQ storms anyway.
As a note: this patch was tested using a ThinkPad T450s and a Chamelium
to simulate the short IRQ storms.
Changes since v1:
- Don't use two separate thresholds, just make long IRQs count for 10
each and short IRQs count for 1. This simplifies the code a bit
- Ville Syrjälä
Changes since v2:
- Document @long_hpd in intel_hpd_irq_storm_detect, no functional
changes
Changes since v4:
- Remove !! in long_hpd assignment - Ville Syrjälä
- queue_hp = true - Ville Syrjälä
Signed-off-by: Lyude Paul <lyude@redhat.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-6-lyude@redhat.com
2018-11-07 04:30:16 +07:00
|
|
|
* Gather stats about HPD IRQs from the specified @pin, and detect IRQ
|
2015-06-18 17:06:16 +07:00
|
|
|
* storms. Only the pin specific stats and state are changed, the caller is
|
|
|
|
* responsible for further action.
|
|
|
|
*
|
drm/i915: Add short HPD IRQ storm detection for non-MST systems
Unfortunately, it seems that the HPD IRQ storm problem from the early
days of Intel GPUs was never entirely solved, only mostly. Within the
last couple of days, I got a bug report from one of our customers who
had been having issues with their machine suddenly booting up very
slowly after having updated. The amount of time it took to boot went
from around 30 seconds, to over 6 minutes consistently.
After some investigation, I discovered that i915 was reporting massive
amounts of short HPD IRQ spam on this system from the DisplayPort port,
despite there not being anything actually connected. The symptoms would
start with one "long" HPD IRQ being detected at boot:
[ 1.891398] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00440000, dig 0x00440000, pins 0x000000a0
[ 1.891436] [drm:intel_hpd_irq_handler [i915]] digital hpd port B - long
[ 1.891472] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 5 - cnt: 0
[ 1.891508] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - long
[ 1.891544] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 7 - cnt: 0
[ 1.891592] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port B - long
[ 1.891628] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port D - long
…
followed by constant short IRQs afterwards:
[ 1.895091] [drm:intel_encoder_hotplug [i915]] [CONNECTOR:66:DP-1] status updated from unknown to disconnected
[ 1.895129] [drm:i915_hotplug_work_func [i915]] Connector DP-3 (pin 7) received hotplug event.
[ 1.895165] [drm:intel_dp_detect [i915]] [CONNECTOR:72:DP-3]
[ 1.895275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895312] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.895762] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895799] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896239] [drm:intel_dp_aux_xfer [i915]] dp_aux_ch timeout status 0x71450085
[ 1.896293] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896330] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896781] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896817] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.897275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
The customer's system in question has a GM45 GPU, which is apparently
well known for hotplugging storms.
So, workaround this impressively broken hardware by changing the default
HPD storm threshold from 5 to 50. Then, make long IRQs count for 10, and
short IRQs count for 1. This makes it so that 5 long IRQs will trigger
an HPD storm, and on systems with short HPD storm detection 50 short
IRQs will trigger an HPD storm. 50 short IRQs amounts to 100ms of
constant pulsing, which seems like a good middleground between being too
sensitive and not being sensitive enough (which would cause visible
stutters in userspace every time a storm occurs).
And just to be extra safe: we don't enable this by default on systems
with MST support. There's too high of a chance of MST support triggering
storm detection, and systems that are new enough to support MST are a
lot less likely to have issues with IRQ storms anyway.
As a note: this patch was tested using a ThinkPad T450s and a Chamelium
to simulate the short IRQ storms.
Changes since v1:
- Don't use two separate thresholds, just make long IRQs count for 10
each and short IRQs count for 1. This simplifies the code a bit
- Ville Syrjälä
Changes since v2:
- Document @long_hpd in intel_hpd_irq_storm_detect, no functional
changes
Changes since v4:
- Remove !! in long_hpd assignment - Ville Syrjälä
- queue_hp = true - Ville Syrjälä
Signed-off-by: Lyude Paul <lyude@redhat.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-6-lyude@redhat.com
2018-11-07 04:30:16 +07:00
|
|
|
* The number of IRQs that are allowed within @HPD_STORM_DETECT_PERIOD is
|
2017-02-04 09:18:25 +07:00
|
|
|
* stored in @dev_priv->hotplug.hpd_storm_threshold which defaults to
|
drm/i915: Add short HPD IRQ storm detection for non-MST systems
Unfortunately, it seems that the HPD IRQ storm problem from the early
days of Intel GPUs was never entirely solved, only mostly. Within the
last couple of days, I got a bug report from one of our customers who
had been having issues with their machine suddenly booting up very
slowly after having updated. The amount of time it took to boot went
from around 30 seconds, to over 6 minutes consistently.
After some investigation, I discovered that i915 was reporting massive
amounts of short HPD IRQ spam on this system from the DisplayPort port,
despite there not being anything actually connected. The symptoms would
start with one "long" HPD IRQ being detected at boot:
[ 1.891398] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00440000, dig 0x00440000, pins 0x000000a0
[ 1.891436] [drm:intel_hpd_irq_handler [i915]] digital hpd port B - long
[ 1.891472] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 5 - cnt: 0
[ 1.891508] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - long
[ 1.891544] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 7 - cnt: 0
[ 1.891592] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port B - long
[ 1.891628] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port D - long
…
followed by constant short IRQs afterwards:
[ 1.895091] [drm:intel_encoder_hotplug [i915]] [CONNECTOR:66:DP-1] status updated from unknown to disconnected
[ 1.895129] [drm:i915_hotplug_work_func [i915]] Connector DP-3 (pin 7) received hotplug event.
[ 1.895165] [drm:intel_dp_detect [i915]] [CONNECTOR:72:DP-3]
[ 1.895275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895312] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.895762] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895799] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896239] [drm:intel_dp_aux_xfer [i915]] dp_aux_ch timeout status 0x71450085
[ 1.896293] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896330] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896781] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896817] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.897275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
The customer's system in question has a GM45 GPU, which is apparently
well known for hotplugging storms.
So, workaround this impressively broken hardware by changing the default
HPD storm threshold from 5 to 50. Then, make long IRQs count for 10, and
short IRQs count for 1. This makes it so that 5 long IRQs will trigger
an HPD storm, and on systems with short HPD storm detection 50 short
IRQs will trigger an HPD storm. 50 short IRQs amounts to 100ms of
constant pulsing, which seems like a good middleground between being too
sensitive and not being sensitive enough (which would cause visible
stutters in userspace every time a storm occurs).
And just to be extra safe: we don't enable this by default on systems
with MST support. There's too high of a chance of MST support triggering
storm detection, and systems that are new enough to support MST are a
lot less likely to have issues with IRQ storms anyway.
As a note: this patch was tested using a ThinkPad T450s and a Chamelium
to simulate the short IRQ storms.
Changes since v1:
- Don't use two separate thresholds, just make long IRQs count for 10
each and short IRQs count for 1. This simplifies the code a bit
- Ville Syrjälä
Changes since v2:
- Document @long_hpd in intel_hpd_irq_storm_detect, no functional
changes
Changes since v4:
- Remove !! in long_hpd assignment - Ville Syrjälä
- queue_hp = true - Ville Syrjälä
Signed-off-by: Lyude Paul <lyude@redhat.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-6-lyude@redhat.com
2018-11-07 04:30:16 +07:00
|
|
|
* @HPD_STORM_DEFAULT_THRESHOLD. Long IRQs count as +10 to this threshold, and
|
|
|
|
* short IRQs count as +1. If this threshold is exceeded, it's considered an
|
|
|
|
* IRQ storm and the IRQ state is set to @HPD_MARK_DISABLED.
|
|
|
|
*
|
|
|
|
* By default, most systems will only count long IRQs towards
|
|
|
|
* &dev_priv->hotplug.hpd_storm_threshold. However, some older systems also
|
|
|
|
* suffer from short IRQ storms and must also track these. Because short IRQ
|
|
|
|
* storms are naturally caused by sideband interactions with DP MST devices,
|
|
|
|
* short IRQ detection is only enabled for systems without DP MST support.
|
|
|
|
* Systems which are new enough to support DP MST are far less likely to
|
|
|
|
* suffer from IRQ storms at all, so this is fine.
|
2017-02-04 09:18:25 +07:00
|
|
|
*
|
|
|
|
* The HPD threshold can be controlled through i915_hpd_storm_ctl in debugfs,
|
|
|
|
* and should only be adjusted for automated hotplug testing.
|
2015-06-18 17:06:16 +07:00
|
|
|
*
|
drm/i915: Add short HPD IRQ storm detection for non-MST systems
Unfortunately, it seems that the HPD IRQ storm problem from the early
days of Intel GPUs was never entirely solved, only mostly. Within the
last couple of days, I got a bug report from one of our customers who
had been having issues with their machine suddenly booting up very
slowly after having updated. The amount of time it took to boot went
from around 30 seconds, to over 6 minutes consistently.
After some investigation, I discovered that i915 was reporting massive
amounts of short HPD IRQ spam on this system from the DisplayPort port,
despite there not being anything actually connected. The symptoms would
start with one "long" HPD IRQ being detected at boot:
[ 1.891398] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00440000, dig 0x00440000, pins 0x000000a0
[ 1.891436] [drm:intel_hpd_irq_handler [i915]] digital hpd port B - long
[ 1.891472] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 5 - cnt: 0
[ 1.891508] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - long
[ 1.891544] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 7 - cnt: 0
[ 1.891592] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port B - long
[ 1.891628] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port D - long
…
followed by constant short IRQs afterwards:
[ 1.895091] [drm:intel_encoder_hotplug [i915]] [CONNECTOR:66:DP-1] status updated from unknown to disconnected
[ 1.895129] [drm:i915_hotplug_work_func [i915]] Connector DP-3 (pin 7) received hotplug event.
[ 1.895165] [drm:intel_dp_detect [i915]] [CONNECTOR:72:DP-3]
[ 1.895275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895312] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.895762] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895799] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896239] [drm:intel_dp_aux_xfer [i915]] dp_aux_ch timeout status 0x71450085
[ 1.896293] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896330] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896781] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896817] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.897275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
The customer's system in question has a GM45 GPU, which is apparently
well known for hotplugging storms.
So, workaround this impressively broken hardware by changing the default
HPD storm threshold from 5 to 50. Then, make long IRQs count for 10, and
short IRQs count for 1. This makes it so that 5 long IRQs will trigger
an HPD storm, and on systems with short HPD storm detection 50 short
IRQs will trigger an HPD storm. 50 short IRQs amounts to 100ms of
constant pulsing, which seems like a good middleground between being too
sensitive and not being sensitive enough (which would cause visible
stutters in userspace every time a storm occurs).
And just to be extra safe: we don't enable this by default on systems
with MST support. There's too high of a chance of MST support triggering
storm detection, and systems that are new enough to support MST are a
lot less likely to have issues with IRQ storms anyway.
As a note: this patch was tested using a ThinkPad T450s and a Chamelium
to simulate the short IRQ storms.
Changes since v1:
- Don't use two separate thresholds, just make long IRQs count for 10
each and short IRQs count for 1. This simplifies the code a bit
- Ville Syrjälä
Changes since v2:
- Document @long_hpd in intel_hpd_irq_storm_detect, no functional
changes
Changes since v4:
- Remove !! in long_hpd assignment - Ville Syrjälä
- queue_hp = true - Ville Syrjälä
Signed-off-by: Lyude Paul <lyude@redhat.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-6-lyude@redhat.com
2018-11-07 04:30:16 +07:00
|
|
|
* Return true if an IRQ storm was detected on @pin.
|
2015-06-18 17:06:16 +07:00
|
|
|
*/
|
|
|
|
static bool intel_hpd_irq_storm_detect(struct drm_i915_private *dev_priv,
|
drm/i915: Add short HPD IRQ storm detection for non-MST systems
Unfortunately, it seems that the HPD IRQ storm problem from the early
days of Intel GPUs was never entirely solved, only mostly. Within the
last couple of days, I got a bug report from one of our customers who
had been having issues with their machine suddenly booting up very
slowly after having updated. The amount of time it took to boot went
from around 30 seconds, to over 6 minutes consistently.
After some investigation, I discovered that i915 was reporting massive
amounts of short HPD IRQ spam on this system from the DisplayPort port,
despite there not being anything actually connected. The symptoms would
start with one "long" HPD IRQ being detected at boot:
[ 1.891398] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00440000, dig 0x00440000, pins 0x000000a0
[ 1.891436] [drm:intel_hpd_irq_handler [i915]] digital hpd port B - long
[ 1.891472] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 5 - cnt: 0
[ 1.891508] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - long
[ 1.891544] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 7 - cnt: 0
[ 1.891592] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port B - long
[ 1.891628] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port D - long
…
followed by constant short IRQs afterwards:
[ 1.895091] [drm:intel_encoder_hotplug [i915]] [CONNECTOR:66:DP-1] status updated from unknown to disconnected
[ 1.895129] [drm:i915_hotplug_work_func [i915]] Connector DP-3 (pin 7) received hotplug event.
[ 1.895165] [drm:intel_dp_detect [i915]] [CONNECTOR:72:DP-3]
[ 1.895275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895312] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.895762] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895799] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896239] [drm:intel_dp_aux_xfer [i915]] dp_aux_ch timeout status 0x71450085
[ 1.896293] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896330] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896781] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896817] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.897275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
The customer's system in question has a GM45 GPU, which is apparently
well known for hotplugging storms.
So, workaround this impressively broken hardware by changing the default
HPD storm threshold from 5 to 50. Then, make long IRQs count for 10, and
short IRQs count for 1. This makes it so that 5 long IRQs will trigger
an HPD storm, and on systems with short HPD storm detection 50 short
IRQs will trigger an HPD storm. 50 short IRQs amounts to 100ms of
constant pulsing, which seems like a good middleground between being too
sensitive and not being sensitive enough (which would cause visible
stutters in userspace every time a storm occurs).
And just to be extra safe: we don't enable this by default on systems
with MST support. There's too high of a chance of MST support triggering
storm detection, and systems that are new enough to support MST are a
lot less likely to have issues with IRQ storms anyway.
As a note: this patch was tested using a ThinkPad T450s and a Chamelium
to simulate the short IRQ storms.
Changes since v1:
- Don't use two separate thresholds, just make long IRQs count for 10
each and short IRQs count for 1. This simplifies the code a bit
- Ville Syrjälä
Changes since v2:
- Document @long_hpd in intel_hpd_irq_storm_detect, no functional
changes
Changes since v4:
- Remove !! in long_hpd assignment - Ville Syrjälä
- queue_hp = true - Ville Syrjälä
Signed-off-by: Lyude Paul <lyude@redhat.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-6-lyude@redhat.com
2018-11-07 04:30:16 +07:00
|
|
|
enum hpd_pin pin, bool long_hpd)
|
2015-06-18 17:06:16 +07:00
|
|
|
{
|
2018-11-07 04:30:14 +07:00
|
|
|
struct i915_hotplug *hpd = &dev_priv->hotplug;
|
|
|
|
unsigned long start = hpd->stats[pin].last_jiffies;
|
2015-06-18 17:06:16 +07:00
|
|
|
unsigned long end = start + msecs_to_jiffies(HPD_STORM_DETECT_PERIOD);
|
drm/i915: Add short HPD IRQ storm detection for non-MST systems
Unfortunately, it seems that the HPD IRQ storm problem from the early
days of Intel GPUs was never entirely solved, only mostly. Within the
last couple of days, I got a bug report from one of our customers who
had been having issues with their machine suddenly booting up very
slowly after having updated. The amount of time it took to boot went
from around 30 seconds, to over 6 minutes consistently.
After some investigation, I discovered that i915 was reporting massive
amounts of short HPD IRQ spam on this system from the DisplayPort port,
despite there not being anything actually connected. The symptoms would
start with one "long" HPD IRQ being detected at boot:
[ 1.891398] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00440000, dig 0x00440000, pins 0x000000a0
[ 1.891436] [drm:intel_hpd_irq_handler [i915]] digital hpd port B - long
[ 1.891472] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 5 - cnt: 0
[ 1.891508] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - long
[ 1.891544] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 7 - cnt: 0
[ 1.891592] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port B - long
[ 1.891628] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port D - long
…
followed by constant short IRQs afterwards:
[ 1.895091] [drm:intel_encoder_hotplug [i915]] [CONNECTOR:66:DP-1] status updated from unknown to disconnected
[ 1.895129] [drm:i915_hotplug_work_func [i915]] Connector DP-3 (pin 7) received hotplug event.
[ 1.895165] [drm:intel_dp_detect [i915]] [CONNECTOR:72:DP-3]
[ 1.895275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895312] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.895762] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895799] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896239] [drm:intel_dp_aux_xfer [i915]] dp_aux_ch timeout status 0x71450085
[ 1.896293] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896330] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896781] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896817] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.897275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
The customer's system in question has a GM45 GPU, which is apparently
well known for hotplugging storms.
So, workaround this impressively broken hardware by changing the default
HPD storm threshold from 5 to 50. Then, make long IRQs count for 10, and
short IRQs count for 1. This makes it so that 5 long IRQs will trigger
an HPD storm, and on systems with short HPD storm detection 50 short
IRQs will trigger an HPD storm. 50 short IRQs amounts to 100ms of
constant pulsing, which seems like a good middleground between being too
sensitive and not being sensitive enough (which would cause visible
stutters in userspace every time a storm occurs).
And just to be extra safe: we don't enable this by default on systems
with MST support. There's too high of a chance of MST support triggering
storm detection, and systems that are new enough to support MST are a
lot less likely to have issues with IRQ storms anyway.
As a note: this patch was tested using a ThinkPad T450s and a Chamelium
to simulate the short IRQ storms.
Changes since v1:
- Don't use two separate thresholds, just make long IRQs count for 10
each and short IRQs count for 1. This simplifies the code a bit
- Ville Syrjälä
Changes since v2:
- Document @long_hpd in intel_hpd_irq_storm_detect, no functional
changes
Changes since v4:
- Remove !! in long_hpd assignment - Ville Syrjälä
- queue_hp = true - Ville Syrjälä
Signed-off-by: Lyude Paul <lyude@redhat.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-6-lyude@redhat.com
2018-11-07 04:30:16 +07:00
|
|
|
const int increment = long_hpd ? 10 : 1;
|
2018-11-07 04:30:14 +07:00
|
|
|
const int threshold = hpd->hpd_storm_threshold;
|
2015-06-18 17:06:16 +07:00
|
|
|
bool storm = false;
|
|
|
|
|
drm/i915: Add short HPD IRQ storm detection for non-MST systems
Unfortunately, it seems that the HPD IRQ storm problem from the early
days of Intel GPUs was never entirely solved, only mostly. Within the
last couple of days, I got a bug report from one of our customers who
had been having issues with their machine suddenly booting up very
slowly after having updated. The amount of time it took to boot went
from around 30 seconds, to over 6 minutes consistently.
After some investigation, I discovered that i915 was reporting massive
amounts of short HPD IRQ spam on this system from the DisplayPort port,
despite there not being anything actually connected. The symptoms would
start with one "long" HPD IRQ being detected at boot:
[ 1.891398] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00440000, dig 0x00440000, pins 0x000000a0
[ 1.891436] [drm:intel_hpd_irq_handler [i915]] digital hpd port B - long
[ 1.891472] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 5 - cnt: 0
[ 1.891508] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - long
[ 1.891544] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 7 - cnt: 0
[ 1.891592] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port B - long
[ 1.891628] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port D - long
…
followed by constant short IRQs afterwards:
[ 1.895091] [drm:intel_encoder_hotplug [i915]] [CONNECTOR:66:DP-1] status updated from unknown to disconnected
[ 1.895129] [drm:i915_hotplug_work_func [i915]] Connector DP-3 (pin 7) received hotplug event.
[ 1.895165] [drm:intel_dp_detect [i915]] [CONNECTOR:72:DP-3]
[ 1.895275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895312] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.895762] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895799] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896239] [drm:intel_dp_aux_xfer [i915]] dp_aux_ch timeout status 0x71450085
[ 1.896293] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896330] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896781] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896817] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.897275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
The customer's system in question has a GM45 GPU, which is apparently
well known for hotplugging storms.
So, workaround this impressively broken hardware by changing the default
HPD storm threshold from 5 to 50. Then, make long IRQs count for 10, and
short IRQs count for 1. This makes it so that 5 long IRQs will trigger
an HPD storm, and on systems with short HPD storm detection 50 short
IRQs will trigger an HPD storm. 50 short IRQs amounts to 100ms of
constant pulsing, which seems like a good middleground between being too
sensitive and not being sensitive enough (which would cause visible
stutters in userspace every time a storm occurs).
And just to be extra safe: we don't enable this by default on systems
with MST support. There's too high of a chance of MST support triggering
storm detection, and systems that are new enough to support MST are a
lot less likely to have issues with IRQ storms anyway.
As a note: this patch was tested using a ThinkPad T450s and a Chamelium
to simulate the short IRQ storms.
Changes since v1:
- Don't use two separate thresholds, just make long IRQs count for 10
each and short IRQs count for 1. This simplifies the code a bit
- Ville Syrjälä
Changes since v2:
- Document @long_hpd in intel_hpd_irq_storm_detect, no functional
changes
Changes since v4:
- Remove !! in long_hpd assignment - Ville Syrjälä
- queue_hp = true - Ville Syrjälä
Signed-off-by: Lyude Paul <lyude@redhat.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-6-lyude@redhat.com
2018-11-07 04:30:16 +07:00
|
|
|
if (!threshold ||
|
|
|
|
(!long_hpd && !dev_priv->hotplug.hpd_short_storm_enabled))
|
2018-11-07 04:30:14 +07:00
|
|
|
return false;
|
|
|
|
|
2015-06-18 17:06:16 +07:00
|
|
|
if (!time_in_range(jiffies, start, end)) {
|
2018-11-07 04:30:14 +07:00
|
|
|
hpd->stats[pin].last_jiffies = jiffies;
|
|
|
|
hpd->stats[pin].count = 0;
|
|
|
|
}
|
|
|
|
|
drm/i915: Add short HPD IRQ storm detection for non-MST systems
Unfortunately, it seems that the HPD IRQ storm problem from the early
days of Intel GPUs was never entirely solved, only mostly. Within the
last couple of days, I got a bug report from one of our customers who
had been having issues with their machine suddenly booting up very
slowly after having updated. The amount of time it took to boot went
from around 30 seconds, to over 6 minutes consistently.
After some investigation, I discovered that i915 was reporting massive
amounts of short HPD IRQ spam on this system from the DisplayPort port,
despite there not being anything actually connected. The symptoms would
start with one "long" HPD IRQ being detected at boot:
[ 1.891398] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00440000, dig 0x00440000, pins 0x000000a0
[ 1.891436] [drm:intel_hpd_irq_handler [i915]] digital hpd port B - long
[ 1.891472] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 5 - cnt: 0
[ 1.891508] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - long
[ 1.891544] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 7 - cnt: 0
[ 1.891592] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port B - long
[ 1.891628] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port D - long
…
followed by constant short IRQs afterwards:
[ 1.895091] [drm:intel_encoder_hotplug [i915]] [CONNECTOR:66:DP-1] status updated from unknown to disconnected
[ 1.895129] [drm:i915_hotplug_work_func [i915]] Connector DP-3 (pin 7) received hotplug event.
[ 1.895165] [drm:intel_dp_detect [i915]] [CONNECTOR:72:DP-3]
[ 1.895275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895312] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.895762] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895799] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896239] [drm:intel_dp_aux_xfer [i915]] dp_aux_ch timeout status 0x71450085
[ 1.896293] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896330] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896781] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896817] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.897275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
The customer's system in question has a GM45 GPU, which is apparently
well known for hotplugging storms.
So, workaround this impressively broken hardware by changing the default
HPD storm threshold from 5 to 50. Then, make long IRQs count for 10, and
short IRQs count for 1. This makes it so that 5 long IRQs will trigger
an HPD storm, and on systems with short HPD storm detection 50 short
IRQs will trigger an HPD storm. 50 short IRQs amounts to 100ms of
constant pulsing, which seems like a good middleground between being too
sensitive and not being sensitive enough (which would cause visible
stutters in userspace every time a storm occurs).
And just to be extra safe: we don't enable this by default on systems
with MST support. There's too high of a chance of MST support triggering
storm detection, and systems that are new enough to support MST are a
lot less likely to have issues with IRQ storms anyway.
As a note: this patch was tested using a ThinkPad T450s and a Chamelium
to simulate the short IRQ storms.
Changes since v1:
- Don't use two separate thresholds, just make long IRQs count for 10
each and short IRQs count for 1. This simplifies the code a bit
- Ville Syrjälä
Changes since v2:
- Document @long_hpd in intel_hpd_irq_storm_detect, no functional
changes
Changes since v4:
- Remove !! in long_hpd assignment - Ville Syrjälä
- queue_hp = true - Ville Syrjälä
Signed-off-by: Lyude Paul <lyude@redhat.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-6-lyude@redhat.com
2018-11-07 04:30:16 +07:00
|
|
|
hpd->stats[pin].count += increment;
|
|
|
|
if (hpd->stats[pin].count > threshold) {
|
2018-11-07 04:30:14 +07:00
|
|
|
hpd->stats[pin].state = HPD_MARK_DISABLED;
|
2015-06-18 17:06:16 +07:00
|
|
|
DRM_DEBUG_KMS("HPD interrupt storm detected on PIN %d\n", pin);
|
|
|
|
storm = true;
|
|
|
|
} else {
|
|
|
|
DRM_DEBUG_KMS("Received HPD interrupt on PIN %d - cnt: %d\n", pin,
|
2018-11-07 04:30:14 +07:00
|
|
|
hpd->stats[pin].count);
|
2015-06-18 17:06:16 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
return storm;
|
|
|
|
}
|
|
|
|
|
2018-11-07 04:30:15 +07:00
|
|
|
static void
|
|
|
|
intel_hpd_irq_storm_switch_to_polling(struct drm_i915_private *dev_priv)
|
2015-06-18 17:06:16 +07:00
|
|
|
{
|
2016-07-05 16:40:23 +07:00
|
|
|
struct drm_device *dev = &dev_priv->drm;
|
2015-06-18 17:06:16 +07:00
|
|
|
struct intel_connector *intel_connector;
|
|
|
|
struct intel_encoder *intel_encoder;
|
|
|
|
struct drm_connector *connector;
|
2017-03-01 16:52:22 +07:00
|
|
|
struct drm_connector_list_iter conn_iter;
|
2015-06-18 17:06:16 +07:00
|
|
|
enum hpd_pin pin;
|
|
|
|
bool hpd_disabled = false;
|
|
|
|
|
2017-03-02 20:28:01 +07:00
|
|
|
lockdep_assert_held(&dev_priv->irq_lock);
|
2015-06-18 17:06:16 +07:00
|
|
|
|
2017-03-01 16:52:22 +07:00
|
|
|
drm_connector_list_iter_begin(dev, &conn_iter);
|
|
|
|
drm_for_each_connector_iter(connector, &conn_iter) {
|
2015-06-18 17:06:16 +07:00
|
|
|
if (connector->polled != DRM_CONNECTOR_POLL_HPD)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
intel_connector = to_intel_connector(connector);
|
|
|
|
intel_encoder = intel_connector->encoder;
|
|
|
|
if (!intel_encoder)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
pin = intel_encoder->hpd_pin;
|
|
|
|
if (pin == HPD_NONE ||
|
|
|
|
dev_priv->hotplug.stats[pin].state != HPD_MARK_DISABLED)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
DRM_INFO("HPD interrupt storm detected on connector %s: "
|
|
|
|
"switching from hotplug detection to polling\n",
|
|
|
|
connector->name);
|
|
|
|
|
|
|
|
dev_priv->hotplug.stats[pin].state = HPD_DISABLED;
|
|
|
|
connector->polled = DRM_CONNECTOR_POLL_CONNECT
|
|
|
|
| DRM_CONNECTOR_POLL_DISCONNECT;
|
|
|
|
hpd_disabled = true;
|
|
|
|
}
|
2017-03-01 16:52:22 +07:00
|
|
|
drm_connector_list_iter_end(&conn_iter);
|
2015-06-18 17:06:16 +07:00
|
|
|
|
|
|
|
/* Enable polling and queue hotplug re-enabling. */
|
|
|
|
if (hpd_disabled) {
|
2017-01-27 09:04:08 +07:00
|
|
|
drm_kms_helper_poll_enable(dev);
|
2015-06-18 17:06:16 +07:00
|
|
|
mod_delayed_work(system_wq, &dev_priv->hotplug.reenable_work,
|
|
|
|
msecs_to_jiffies(HPD_STORM_REENABLE_DELAY));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv =
|
|
|
|
container_of(work, typeof(*dev_priv),
|
|
|
|
hotplug.reenable_work.work);
|
2016-07-05 16:40:23 +07:00
|
|
|
struct drm_device *dev = &dev_priv->drm;
|
2019-01-14 21:21:20 +07:00
|
|
|
intel_wakeref_t wakeref;
|
2018-07-05 23:43:54 +07:00
|
|
|
enum hpd_pin pin;
|
2015-06-18 17:06:16 +07:00
|
|
|
|
2019-06-14 06:21:54 +07:00
|
|
|
wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
|
2015-06-18 17:06:16 +07:00
|
|
|
|
|
|
|
spin_lock_irq(&dev_priv->irq_lock);
|
2018-07-05 23:43:54 +07:00
|
|
|
for_each_hpd_pin(pin) {
|
2015-06-18 17:06:16 +07:00
|
|
|
struct drm_connector *connector;
|
2017-03-01 16:52:22 +07:00
|
|
|
struct drm_connector_list_iter conn_iter;
|
2015-06-18 17:06:16 +07:00
|
|
|
|
2018-07-05 23:43:54 +07:00
|
|
|
if (dev_priv->hotplug.stats[pin].state != HPD_DISABLED)
|
2015-06-18 17:06:16 +07:00
|
|
|
continue;
|
|
|
|
|
2018-07-05 23:43:54 +07:00
|
|
|
dev_priv->hotplug.stats[pin].state = HPD_ENABLED;
|
2015-06-18 17:06:16 +07:00
|
|
|
|
2017-03-01 16:52:22 +07:00
|
|
|
drm_connector_list_iter_begin(dev, &conn_iter);
|
|
|
|
drm_for_each_connector_iter(connector, &conn_iter) {
|
2015-06-18 17:06:16 +07:00
|
|
|
struct intel_connector *intel_connector = to_intel_connector(connector);
|
|
|
|
|
2018-11-07 04:30:13 +07:00
|
|
|
/* Don't check MST ports, they don't have pins */
|
|
|
|
if (!intel_connector->mst_port &&
|
|
|
|
intel_connector->encoder->hpd_pin == pin) {
|
2015-06-18 17:06:16 +07:00
|
|
|
if (connector->polled != intel_connector->polled)
|
|
|
|
DRM_DEBUG_DRIVER("Reenabling HPD on connector %s\n",
|
|
|
|
connector->name);
|
|
|
|
connector->polled = intel_connector->polled;
|
|
|
|
if (!connector->polled)
|
|
|
|
connector->polled = DRM_CONNECTOR_POLL_HPD;
|
|
|
|
}
|
|
|
|
}
|
2017-03-01 16:52:22 +07:00
|
|
|
drm_connector_list_iter_end(&conn_iter);
|
2015-06-18 17:06:16 +07:00
|
|
|
}
|
2017-02-15 20:15:47 +07:00
|
|
|
if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup)
|
2016-05-06 20:48:28 +07:00
|
|
|
dev_priv->display.hpd_irq_setup(dev_priv);
|
2015-06-18 17:06:16 +07:00
|
|
|
spin_unlock_irq(&dev_priv->irq_lock);
|
|
|
|
|
2019-06-14 06:21:54 +07:00
|
|
|
intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
|
2015-06-18 17:06:16 +07:00
|
|
|
}
|
|
|
|
|
2019-07-12 07:53:42 +07:00
|
|
|
enum intel_hotplug_state
|
|
|
|
intel_encoder_hotplug(struct intel_encoder *encoder,
|
|
|
|
struct intel_connector *connector,
|
|
|
|
bool irq_received)
|
2015-06-18 17:06:16 +07:00
|
|
|
{
|
2018-01-18 02:21:46 +07:00
|
|
|
struct drm_device *dev = connector->base.dev;
|
2015-06-18 17:06:16 +07:00
|
|
|
enum drm_connector_status old_status;
|
|
|
|
|
|
|
|
WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
|
2018-01-18 02:21:46 +07:00
|
|
|
old_status = connector->base.status;
|
2015-06-18 17:06:16 +07:00
|
|
|
|
2018-01-18 02:21:46 +07:00
|
|
|
connector->base.status =
|
|
|
|
drm_helper_probe_detect(&connector->base, NULL, false);
|
2017-04-07 01:55:20 +07:00
|
|
|
|
2018-01-18 02:21:46 +07:00
|
|
|
if (old_status == connector->base.status)
|
2019-07-12 07:53:42 +07:00
|
|
|
return INTEL_HOTPLUG_UNCHANGED;
|
2015-06-18 17:06:16 +07:00
|
|
|
|
|
|
|
DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %s to %s\n",
|
2018-01-18 02:21:46 +07:00
|
|
|
connector->base.base.id,
|
|
|
|
connector->base.name,
|
2015-06-18 17:06:16 +07:00
|
|
|
drm_get_connector_status_name(old_status),
|
2018-01-18 02:21:46 +07:00
|
|
|
drm_get_connector_status_name(connector->base.status));
|
2015-06-18 17:06:16 +07:00
|
|
|
|
2019-07-12 07:53:42 +07:00
|
|
|
return INTEL_HOTPLUG_CHANGED;
|
2015-06-18 17:06:16 +07:00
|
|
|
}
|
|
|
|
|
2018-07-05 23:43:53 +07:00
|
|
|
static bool intel_encoder_has_hpd_pulse(struct intel_encoder *encoder)
|
|
|
|
{
|
|
|
|
return intel_encoder_is_dig_port(encoder) &&
|
|
|
|
enc_to_dig_port(&encoder->base)->hpd_pulse != NULL;
|
|
|
|
}
|
|
|
|
|
2015-06-18 17:06:16 +07:00
|
|
|
static void i915_digport_work_func(struct work_struct *work)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv =
|
|
|
|
container_of(work, struct drm_i915_private, hotplug.dig_port_work);
|
|
|
|
u32 long_port_mask, short_port_mask;
|
2018-07-05 23:43:53 +07:00
|
|
|
struct intel_encoder *encoder;
|
2015-06-18 17:06:16 +07:00
|
|
|
u32 old_bits = 0;
|
|
|
|
|
|
|
|
spin_lock_irq(&dev_priv->irq_lock);
|
|
|
|
long_port_mask = dev_priv->hotplug.long_port_mask;
|
|
|
|
dev_priv->hotplug.long_port_mask = 0;
|
|
|
|
short_port_mask = dev_priv->hotplug.short_port_mask;
|
|
|
|
dev_priv->hotplug.short_port_mask = 0;
|
|
|
|
spin_unlock_irq(&dev_priv->irq_lock);
|
|
|
|
|
2018-07-05 23:43:53 +07:00
|
|
|
for_each_intel_encoder(&dev_priv->drm, encoder) {
|
|
|
|
struct intel_digital_port *dig_port;
|
|
|
|
enum port port = encoder->port;
|
|
|
|
bool long_hpd, short_hpd;
|
|
|
|
enum irqreturn ret;
|
|
|
|
|
|
|
|
if (!intel_encoder_has_hpd_pulse(encoder))
|
2015-06-18 17:06:16 +07:00
|
|
|
continue;
|
|
|
|
|
2018-07-05 23:43:53 +07:00
|
|
|
long_hpd = long_port_mask & BIT(port);
|
|
|
|
short_hpd = short_port_mask & BIT(port);
|
|
|
|
|
|
|
|
if (!long_hpd && !short_hpd)
|
|
|
|
continue;
|
2015-06-18 17:06:16 +07:00
|
|
|
|
2018-07-05 23:43:53 +07:00
|
|
|
dig_port = enc_to_dig_port(&encoder->base);
|
2015-06-18 17:06:16 +07:00
|
|
|
|
2018-07-05 23:43:53 +07:00
|
|
|
ret = dig_port->hpd_pulse(dig_port, long_hpd);
|
|
|
|
if (ret == IRQ_NONE) {
|
|
|
|
/* fall back to old school hpd */
|
|
|
|
old_bits |= BIT(encoder->hpd_pin);
|
2015-06-18 17:06:16 +07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (old_bits) {
|
|
|
|
spin_lock_irq(&dev_priv->irq_lock);
|
|
|
|
dev_priv->hotplug.event_bits |= old_bits;
|
|
|
|
spin_unlock_irq(&dev_priv->irq_lock);
|
2019-07-12 07:53:42 +07:00
|
|
|
queue_delayed_work(system_wq, &dev_priv->hotplug.hotplug_work, 0);
|
2015-06-18 17:06:16 +07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Handle hotplug events outside the interrupt handler proper.
|
|
|
|
*/
|
|
|
|
static void i915_hotplug_work_func(struct work_struct *work)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv =
|
2019-07-12 07:53:42 +07:00
|
|
|
container_of(work, struct drm_i915_private,
|
|
|
|
hotplug.hotplug_work.work);
|
2016-07-05 16:40:23 +07:00
|
|
|
struct drm_device *dev = &dev_priv->drm;
|
2015-06-18 17:06:16 +07:00
|
|
|
struct intel_connector *intel_connector;
|
|
|
|
struct intel_encoder *intel_encoder;
|
|
|
|
struct drm_connector *connector;
|
2017-03-01 16:52:22 +07:00
|
|
|
struct drm_connector_list_iter conn_iter;
|
2019-07-12 07:53:42 +07:00
|
|
|
u32 changed = 0, retry = 0;
|
2015-06-18 17:06:16 +07:00
|
|
|
u32 hpd_event_bits;
|
2019-07-12 07:53:42 +07:00
|
|
|
u32 hpd_retry_bits;
|
2015-06-18 17:06:16 +07:00
|
|
|
|
2017-03-01 16:52:22 +07:00
|
|
|
mutex_lock(&dev->mode_config.mutex);
|
2015-06-18 17:06:16 +07:00
|
|
|
DRM_DEBUG_KMS("running encoder hotplug functions\n");
|
|
|
|
|
|
|
|
spin_lock_irq(&dev_priv->irq_lock);
|
|
|
|
|
|
|
|
hpd_event_bits = dev_priv->hotplug.event_bits;
|
|
|
|
dev_priv->hotplug.event_bits = 0;
|
2019-07-12 07:53:42 +07:00
|
|
|
hpd_retry_bits = dev_priv->hotplug.retry_bits;
|
|
|
|
dev_priv->hotplug.retry_bits = 0;
|
2015-06-18 17:06:16 +07:00
|
|
|
|
2018-11-07 04:30:15 +07:00
|
|
|
/* Enable polling for connectors which had HPD IRQ storms */
|
|
|
|
intel_hpd_irq_storm_switch_to_polling(dev_priv);
|
2015-06-18 17:06:16 +07:00
|
|
|
|
|
|
|
spin_unlock_irq(&dev_priv->irq_lock);
|
|
|
|
|
2017-03-01 16:52:22 +07:00
|
|
|
drm_connector_list_iter_begin(dev, &conn_iter);
|
|
|
|
drm_for_each_connector_iter(connector, &conn_iter) {
|
2019-07-12 07:53:42 +07:00
|
|
|
u32 hpd_bit;
|
|
|
|
|
2015-06-18 17:06:16 +07:00
|
|
|
intel_connector = to_intel_connector(connector);
|
|
|
|
if (!intel_connector->encoder)
|
|
|
|
continue;
|
|
|
|
intel_encoder = intel_connector->encoder;
|
2019-07-12 07:53:42 +07:00
|
|
|
hpd_bit = BIT(intel_encoder->hpd_pin);
|
|
|
|
if ((hpd_event_bits | hpd_retry_bits) & hpd_bit) {
|
2015-06-18 17:06:16 +07:00
|
|
|
DRM_DEBUG_KMS("Connector %s (pin %i) received hotplug event.\n",
|
|
|
|
connector->name, intel_encoder->hpd_pin);
|
2018-01-18 02:21:46 +07:00
|
|
|
|
2019-07-12 07:53:42 +07:00
|
|
|
switch (intel_encoder->hotplug(intel_encoder,
|
|
|
|
intel_connector,
|
|
|
|
hpd_event_bits & hpd_bit)) {
|
|
|
|
case INTEL_HOTPLUG_UNCHANGED:
|
|
|
|
break;
|
|
|
|
case INTEL_HOTPLUG_CHANGED:
|
|
|
|
changed |= hpd_bit;
|
|
|
|
break;
|
|
|
|
case INTEL_HOTPLUG_RETRY:
|
|
|
|
retry |= hpd_bit;
|
|
|
|
break;
|
|
|
|
}
|
2015-06-18 17:06:16 +07:00
|
|
|
}
|
|
|
|
}
|
2017-03-01 16:52:22 +07:00
|
|
|
drm_connector_list_iter_end(&conn_iter);
|
|
|
|
mutex_unlock(&dev->mode_config.mutex);
|
2015-06-18 17:06:16 +07:00
|
|
|
|
|
|
|
if (changed)
|
|
|
|
drm_kms_helper_hotplug_event(dev);
|
2019-07-12 07:53:42 +07:00
|
|
|
|
|
|
|
/* Remove shared HPD pins that have changed */
|
|
|
|
retry &= ~changed;
|
|
|
|
if (retry) {
|
|
|
|
spin_lock_irq(&dev_priv->irq_lock);
|
|
|
|
dev_priv->hotplug.retry_bits |= retry;
|
|
|
|
spin_unlock_irq(&dev_priv->irq_lock);
|
|
|
|
|
|
|
|
mod_delayed_work(system_wq, &dev_priv->hotplug.hotplug_work,
|
|
|
|
msecs_to_jiffies(HPD_RETRY_DELAY));
|
|
|
|
}
|
2015-06-18 17:06:16 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* intel_hpd_irq_handler - main hotplug irq handler
|
2016-05-06 20:48:28 +07:00
|
|
|
* @dev_priv: drm_i915_private
|
2015-06-18 17:06:16 +07:00
|
|
|
* @pin_mask: a mask of hpd pins that have triggered the irq
|
|
|
|
* @long_mask: a mask of hpd pins that may be long hpd pulses
|
|
|
|
*
|
|
|
|
* This is the main hotplug irq handler for all platforms. The platform specific
|
|
|
|
* irq handlers call the platform specific hotplug irq handlers, which read and
|
|
|
|
* decode the appropriate registers into bitmasks about hpd pins that have
|
|
|
|
* triggered (@pin_mask), and which of those pins may be long pulses
|
|
|
|
* (@long_mask). The @long_mask is ignored if the port corresponding to the pin
|
|
|
|
* is not a digital port.
|
|
|
|
*
|
|
|
|
* Here, we do hotplug irq storm detection and mitigation, and pass further
|
|
|
|
* processing to appropriate bottom halves.
|
|
|
|
*/
|
2016-05-06 20:48:28 +07:00
|
|
|
void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
|
2015-06-18 17:06:16 +07:00
|
|
|
u32 pin_mask, u32 long_mask)
|
|
|
|
{
|
2018-07-05 23:43:53 +07:00
|
|
|
struct intel_encoder *encoder;
|
2015-06-18 17:06:16 +07:00
|
|
|
bool storm_detected = false;
|
|
|
|
bool queue_dig = false, queue_hp = false;
|
2018-11-09 03:04:24 +07:00
|
|
|
u32 long_hpd_pulse_mask = 0;
|
|
|
|
u32 short_hpd_pulse_mask = 0;
|
|
|
|
enum hpd_pin pin;
|
2015-06-18 17:06:16 +07:00
|
|
|
|
|
|
|
if (!pin_mask)
|
|
|
|
return;
|
|
|
|
|
|
|
|
spin_lock(&dev_priv->irq_lock);
|
2018-11-09 03:04:24 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Determine whether ->hpd_pulse() exists for each pin, and
|
|
|
|
* whether we have a short or a long pulse. This is needed
|
|
|
|
* as each pin may have up to two encoders (HDMI and DP) and
|
|
|
|
* only the one of them (DP) will have ->hpd_pulse().
|
|
|
|
*/
|
2018-07-05 23:43:53 +07:00
|
|
|
for_each_intel_encoder(&dev_priv->drm, encoder) {
|
|
|
|
bool has_hpd_pulse = intel_encoder_has_hpd_pulse(encoder);
|
2018-11-09 03:04:24 +07:00
|
|
|
enum port port = encoder->port;
|
|
|
|
bool long_hpd;
|
2018-07-05 23:43:53 +07:00
|
|
|
|
2018-11-09 03:04:24 +07:00
|
|
|
pin = encoder->hpd_pin;
|
2018-07-05 23:43:54 +07:00
|
|
|
if (!(BIT(pin) & pin_mask))
|
2015-06-18 17:06:16 +07:00
|
|
|
continue;
|
|
|
|
|
2018-11-09 03:04:24 +07:00
|
|
|
if (!has_hpd_pulse)
|
|
|
|
continue;
|
2015-06-18 17:06:16 +07:00
|
|
|
|
2018-11-09 03:04:24 +07:00
|
|
|
long_hpd = long_mask & BIT(pin);
|
drm/i915: Add short HPD IRQ storm detection for non-MST systems
Unfortunately, it seems that the HPD IRQ storm problem from the early
days of Intel GPUs was never entirely solved, only mostly. Within the
last couple of days, I got a bug report from one of our customers who
had been having issues with their machine suddenly booting up very
slowly after having updated. The amount of time it took to boot went
from around 30 seconds, to over 6 minutes consistently.
After some investigation, I discovered that i915 was reporting massive
amounts of short HPD IRQ spam on this system from the DisplayPort port,
despite there not being anything actually connected. The symptoms would
start with one "long" HPD IRQ being detected at boot:
[ 1.891398] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00440000, dig 0x00440000, pins 0x000000a0
[ 1.891436] [drm:intel_hpd_irq_handler [i915]] digital hpd port B - long
[ 1.891472] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 5 - cnt: 0
[ 1.891508] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - long
[ 1.891544] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 7 - cnt: 0
[ 1.891592] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port B - long
[ 1.891628] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port D - long
…
followed by constant short IRQs afterwards:
[ 1.895091] [drm:intel_encoder_hotplug [i915]] [CONNECTOR:66:DP-1] status updated from unknown to disconnected
[ 1.895129] [drm:i915_hotplug_work_func [i915]] Connector DP-3 (pin 7) received hotplug event.
[ 1.895165] [drm:intel_dp_detect [i915]] [CONNECTOR:72:DP-3]
[ 1.895275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895312] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.895762] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895799] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896239] [drm:intel_dp_aux_xfer [i915]] dp_aux_ch timeout status 0x71450085
[ 1.896293] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896330] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896781] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896817] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.897275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
The customer's system in question has a GM45 GPU, which is apparently
well known for hotplugging storms.
So, workaround this impressively broken hardware by changing the default
HPD storm threshold from 5 to 50. Then, make long IRQs count for 10, and
short IRQs count for 1. This makes it so that 5 long IRQs will trigger
an HPD storm, and on systems with short HPD storm detection 50 short
IRQs will trigger an HPD storm. 50 short IRQs amounts to 100ms of
constant pulsing, which seems like a good middleground between being too
sensitive and not being sensitive enough (which would cause visible
stutters in userspace every time a storm occurs).
And just to be extra safe: we don't enable this by default on systems
with MST support. There's too high of a chance of MST support triggering
storm detection, and systems that are new enough to support MST are a
lot less likely to have issues with IRQ storms anyway.
As a note: this patch was tested using a ThinkPad T450s and a Chamelium
to simulate the short IRQ storms.
Changes since v1:
- Don't use two separate thresholds, just make long IRQs count for 10
each and short IRQs count for 1. This simplifies the code a bit
- Ville Syrjälä
Changes since v2:
- Document @long_hpd in intel_hpd_irq_storm_detect, no functional
changes
Changes since v4:
- Remove !! in long_hpd assignment - Ville Syrjälä
- queue_hp = true - Ville Syrjälä
Signed-off-by: Lyude Paul <lyude@redhat.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-6-lyude@redhat.com
2018-11-07 04:30:16 +07:00
|
|
|
|
2018-11-09 03:04:24 +07:00
|
|
|
DRM_DEBUG_DRIVER("digital hpd port %c - %s\n", port_name(port),
|
|
|
|
long_hpd ? "long" : "short");
|
|
|
|
queue_dig = true;
|
drm/i915: Add short HPD IRQ storm detection for non-MST systems
Unfortunately, it seems that the HPD IRQ storm problem from the early
days of Intel GPUs was never entirely solved, only mostly. Within the
last couple of days, I got a bug report from one of our customers who
had been having issues with their machine suddenly booting up very
slowly after having updated. The amount of time it took to boot went
from around 30 seconds, to over 6 minutes consistently.
After some investigation, I discovered that i915 was reporting massive
amounts of short HPD IRQ spam on this system from the DisplayPort port,
despite there not being anything actually connected. The symptoms would
start with one "long" HPD IRQ being detected at boot:
[ 1.891398] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00440000, dig 0x00440000, pins 0x000000a0
[ 1.891436] [drm:intel_hpd_irq_handler [i915]] digital hpd port B - long
[ 1.891472] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 5 - cnt: 0
[ 1.891508] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - long
[ 1.891544] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 7 - cnt: 0
[ 1.891592] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port B - long
[ 1.891628] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port D - long
…
followed by constant short IRQs afterwards:
[ 1.895091] [drm:intel_encoder_hotplug [i915]] [CONNECTOR:66:DP-1] status updated from unknown to disconnected
[ 1.895129] [drm:i915_hotplug_work_func [i915]] Connector DP-3 (pin 7) received hotplug event.
[ 1.895165] [drm:intel_dp_detect [i915]] [CONNECTOR:72:DP-3]
[ 1.895275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895312] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.895762] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895799] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896239] [drm:intel_dp_aux_xfer [i915]] dp_aux_ch timeout status 0x71450085
[ 1.896293] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896330] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896781] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896817] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.897275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
The customer's system in question has a GM45 GPU, which is apparently
well known for hotplugging storms.
So, workaround this impressively broken hardware by changing the default
HPD storm threshold from 5 to 50. Then, make long IRQs count for 10, and
short IRQs count for 1. This makes it so that 5 long IRQs will trigger
an HPD storm, and on systems with short HPD storm detection 50 short
IRQs will trigger an HPD storm. 50 short IRQs amounts to 100ms of
constant pulsing, which seems like a good middleground between being too
sensitive and not being sensitive enough (which would cause visible
stutters in userspace every time a storm occurs).
And just to be extra safe: we don't enable this by default on systems
with MST support. There's too high of a chance of MST support triggering
storm detection, and systems that are new enough to support MST are a
lot less likely to have issues with IRQ storms anyway.
As a note: this patch was tested using a ThinkPad T450s and a Chamelium
to simulate the short IRQ storms.
Changes since v1:
- Don't use two separate thresholds, just make long IRQs count for 10
each and short IRQs count for 1. This simplifies the code a bit
- Ville Syrjälä
Changes since v2:
- Document @long_hpd in intel_hpd_irq_storm_detect, no functional
changes
Changes since v4:
- Remove !! in long_hpd assignment - Ville Syrjälä
- queue_hp = true - Ville Syrjälä
Signed-off-by: Lyude Paul <lyude@redhat.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-6-lyude@redhat.com
2018-11-07 04:30:16 +07:00
|
|
|
|
2018-11-09 03:04:24 +07:00
|
|
|
if (long_hpd) {
|
|
|
|
long_hpd_pulse_mask |= BIT(pin);
|
|
|
|
dev_priv->hotplug.long_port_mask |= BIT(port);
|
|
|
|
} else {
|
|
|
|
short_hpd_pulse_mask |= BIT(pin);
|
|
|
|
dev_priv->hotplug.short_port_mask |= BIT(port);
|
2015-06-18 17:06:16 +07:00
|
|
|
}
|
2018-11-09 03:04:24 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Now process each pin just once */
|
|
|
|
for_each_hpd_pin(pin) {
|
|
|
|
bool long_hpd;
|
|
|
|
|
|
|
|
if (!(BIT(pin) & pin_mask))
|
|
|
|
continue;
|
2015-06-18 17:06:16 +07:00
|
|
|
|
2018-07-05 23:43:54 +07:00
|
|
|
if (dev_priv->hotplug.stats[pin].state == HPD_DISABLED) {
|
2015-06-18 17:06:16 +07:00
|
|
|
/*
|
|
|
|
* On GMCH platforms the interrupt mask bits only
|
|
|
|
* prevent irq generation, not the setting of the
|
|
|
|
* hotplug bits itself. So only WARN about unexpected
|
|
|
|
* interrupts on saner platforms.
|
|
|
|
*/
|
2019-02-05 05:25:38 +07:00
|
|
|
WARN_ONCE(!HAS_GMCH(dev_priv),
|
2018-07-05 23:43:54 +07:00
|
|
|
"Received HPD interrupt on pin %d although disabled\n", pin);
|
2015-06-18 17:06:16 +07:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2018-07-05 23:43:54 +07:00
|
|
|
if (dev_priv->hotplug.stats[pin].state != HPD_ENABLED)
|
2015-06-18 17:06:16 +07:00
|
|
|
continue;
|
|
|
|
|
2018-11-09 03:04:24 +07:00
|
|
|
/*
|
|
|
|
* Delegate to ->hpd_pulse() if one of the encoders for this
|
|
|
|
* pin has it, otherwise let the hotplug_work deal with this
|
|
|
|
* pin directly.
|
|
|
|
*/
|
|
|
|
if (((short_hpd_pulse_mask | long_hpd_pulse_mask) & BIT(pin))) {
|
|
|
|
long_hpd = long_hpd_pulse_mask & BIT(pin);
|
|
|
|
} else {
|
2018-07-05 23:43:54 +07:00
|
|
|
dev_priv->hotplug.event_bits |= BIT(pin);
|
2018-11-09 03:04:24 +07:00
|
|
|
long_hpd = true;
|
2015-06-18 17:06:16 +07:00
|
|
|
queue_hp = true;
|
|
|
|
}
|
|
|
|
|
drm/i915: Add short HPD IRQ storm detection for non-MST systems
Unfortunately, it seems that the HPD IRQ storm problem from the early
days of Intel GPUs was never entirely solved, only mostly. Within the
last couple of days, I got a bug report from one of our customers who
had been having issues with their machine suddenly booting up very
slowly after having updated. The amount of time it took to boot went
from around 30 seconds, to over 6 minutes consistently.
After some investigation, I discovered that i915 was reporting massive
amounts of short HPD IRQ spam on this system from the DisplayPort port,
despite there not being anything actually connected. The symptoms would
start with one "long" HPD IRQ being detected at boot:
[ 1.891398] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00440000, dig 0x00440000, pins 0x000000a0
[ 1.891436] [drm:intel_hpd_irq_handler [i915]] digital hpd port B - long
[ 1.891472] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 5 - cnt: 0
[ 1.891508] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - long
[ 1.891544] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 7 - cnt: 0
[ 1.891592] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port B - long
[ 1.891628] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port D - long
…
followed by constant short IRQs afterwards:
[ 1.895091] [drm:intel_encoder_hotplug [i915]] [CONNECTOR:66:DP-1] status updated from unknown to disconnected
[ 1.895129] [drm:i915_hotplug_work_func [i915]] Connector DP-3 (pin 7) received hotplug event.
[ 1.895165] [drm:intel_dp_detect [i915]] [CONNECTOR:72:DP-3]
[ 1.895275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895312] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.895762] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895799] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896239] [drm:intel_dp_aux_xfer [i915]] dp_aux_ch timeout status 0x71450085
[ 1.896293] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896330] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896781] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896817] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.897275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
The customer's system in question has a GM45 GPU, which is apparently
well known for hotplugging storms.
So, workaround this impressively broken hardware by changing the default
HPD storm threshold from 5 to 50. Then, make long IRQs count for 10, and
short IRQs count for 1. This makes it so that 5 long IRQs will trigger
an HPD storm, and on systems with short HPD storm detection 50 short
IRQs will trigger an HPD storm. 50 short IRQs amounts to 100ms of
constant pulsing, which seems like a good middleground between being too
sensitive and not being sensitive enough (which would cause visible
stutters in userspace every time a storm occurs).
And just to be extra safe: we don't enable this by default on systems
with MST support. There's too high of a chance of MST support triggering
storm detection, and systems that are new enough to support MST are a
lot less likely to have issues with IRQ storms anyway.
As a note: this patch was tested using a ThinkPad T450s and a Chamelium
to simulate the short IRQ storms.
Changes since v1:
- Don't use two separate thresholds, just make long IRQs count for 10
each and short IRQs count for 1. This simplifies the code a bit
- Ville Syrjälä
Changes since v2:
- Document @long_hpd in intel_hpd_irq_storm_detect, no functional
changes
Changes since v4:
- Remove !! in long_hpd assignment - Ville Syrjälä
- queue_hp = true - Ville Syrjälä
Signed-off-by: Lyude Paul <lyude@redhat.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-6-lyude@redhat.com
2018-11-07 04:30:16 +07:00
|
|
|
if (intel_hpd_irq_storm_detect(dev_priv, pin, long_hpd)) {
|
2018-07-05 23:43:54 +07:00
|
|
|
dev_priv->hotplug.event_bits &= ~BIT(pin);
|
2015-06-18 17:06:16 +07:00
|
|
|
storm_detected = true;
|
drm/i915: Add short HPD IRQ storm detection for non-MST systems
Unfortunately, it seems that the HPD IRQ storm problem from the early
days of Intel GPUs was never entirely solved, only mostly. Within the
last couple of days, I got a bug report from one of our customers who
had been having issues with their machine suddenly booting up very
slowly after having updated. The amount of time it took to boot went
from around 30 seconds, to over 6 minutes consistently.
After some investigation, I discovered that i915 was reporting massive
amounts of short HPD IRQ spam on this system from the DisplayPort port,
despite there not being anything actually connected. The symptoms would
start with one "long" HPD IRQ being detected at boot:
[ 1.891398] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00440000, dig 0x00440000, pins 0x000000a0
[ 1.891436] [drm:intel_hpd_irq_handler [i915]] digital hpd port B - long
[ 1.891472] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 5 - cnt: 0
[ 1.891508] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - long
[ 1.891544] [drm:intel_hpd_irq_handler [i915]] Received HPD interrupt on PIN 7 - cnt: 0
[ 1.891592] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port B - long
[ 1.891628] [drm:intel_dp_hpd_pulse [i915]] got hpd irq on port D - long
…
followed by constant short IRQs afterwards:
[ 1.895091] [drm:intel_encoder_hotplug [i915]] [CONNECTOR:66:DP-1] status updated from unknown to disconnected
[ 1.895129] [drm:i915_hotplug_work_func [i915]] Connector DP-3 (pin 7) received hotplug event.
[ 1.895165] [drm:intel_dp_detect [i915]] [CONNECTOR:72:DP-3]
[ 1.895275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895312] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.895762] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.895799] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896239] [drm:intel_dp_aux_xfer [i915]] dp_aux_ch timeout status 0x71450085
[ 1.896293] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896330] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.896781] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
[ 1.896817] [drm:intel_hpd_irq_handler [i915]] digital hpd port D - short
[ 1.897275] [drm:intel_get_hpd_pins [i915]] hotplug event received, stat 0x00200000, dig 0x00200000, pins 0x00000080
The customer's system in question has a GM45 GPU, which is apparently
well known for hotplugging storms.
So, workaround this impressively broken hardware by changing the default
HPD storm threshold from 5 to 50. Then, make long IRQs count for 10, and
short IRQs count for 1. This makes it so that 5 long IRQs will trigger
an HPD storm, and on systems with short HPD storm detection 50 short
IRQs will trigger an HPD storm. 50 short IRQs amounts to 100ms of
constant pulsing, which seems like a good middleground between being too
sensitive and not being sensitive enough (which would cause visible
stutters in userspace every time a storm occurs).
And just to be extra safe: we don't enable this by default on systems
with MST support. There's too high of a chance of MST support triggering
storm detection, and systems that are new enough to support MST are a
lot less likely to have issues with IRQ storms anyway.
As a note: this patch was tested using a ThinkPad T450s and a Chamelium
to simulate the short IRQ storms.
Changes since v1:
- Don't use two separate thresholds, just make long IRQs count for 10
each and short IRQs count for 1. This simplifies the code a bit
- Ville Syrjälä
Changes since v2:
- Document @long_hpd in intel_hpd_irq_storm_detect, no functional
changes
Changes since v4:
- Remove !! in long_hpd assignment - Ville Syrjälä
- queue_hp = true - Ville Syrjälä
Signed-off-by: Lyude Paul <lyude@redhat.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-6-lyude@redhat.com
2018-11-07 04:30:16 +07:00
|
|
|
queue_hp = true;
|
2015-06-18 17:06:16 +07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-07 04:30:15 +07:00
|
|
|
/*
|
|
|
|
* Disable any IRQs that storms were detected on. Polling enablement
|
|
|
|
* happens later in our hotplug work.
|
|
|
|
*/
|
2017-02-15 20:15:47 +07:00
|
|
|
if (storm_detected && dev_priv->display_irqs_enabled)
|
2016-05-06 20:48:28 +07:00
|
|
|
dev_priv->display.hpd_irq_setup(dev_priv);
|
2015-06-18 17:06:16 +07:00
|
|
|
spin_unlock(&dev_priv->irq_lock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Our hotplug handler can grab modeset locks (by calling down into the
|
|
|
|
* fb helpers). Hence it must not be run on our own dev-priv->wq work
|
|
|
|
* queue for otherwise the flush_work in the pageflip code will
|
|
|
|
* deadlock.
|
|
|
|
*/
|
|
|
|
if (queue_dig)
|
|
|
|
queue_work(dev_priv->hotplug.dp_wq, &dev_priv->hotplug.dig_port_work);
|
|
|
|
if (queue_hp)
|
2019-07-12 07:53:42 +07:00
|
|
|
queue_delayed_work(system_wq, &dev_priv->hotplug.hotplug_work, 0);
|
2015-06-18 17:06:16 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* intel_hpd_init - initializes and enables hpd support
|
|
|
|
* @dev_priv: i915 device instance
|
|
|
|
*
|
|
|
|
* This function enables the hotplug support. It requires that interrupts have
|
|
|
|
* already been enabled with intel_irq_init_hw(). From this point on hotplug and
|
|
|
|
* poll request can run concurrently to other code, so locking rules must be
|
|
|
|
* obeyed.
|
|
|
|
*
|
|
|
|
* This is a separate step from interrupt enabling to simplify the locking rules
|
|
|
|
* in the driver load and resume code.
|
2016-06-22 04:03:44 +07:00
|
|
|
*
|
|
|
|
* Also see: intel_hpd_poll_init(), which enables connector polling
|
2015-06-18 17:06:16 +07:00
|
|
|
*/
|
|
|
|
void intel_hpd_init(struct drm_i915_private *dev_priv)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for_each_hpd_pin(i) {
|
|
|
|
dev_priv->hotplug.stats[i].count = 0;
|
|
|
|
dev_priv->hotplug.stats[i].state = HPD_ENABLED;
|
|
|
|
}
|
2016-06-22 04:03:44 +07:00
|
|
|
|
|
|
|
WRITE_ONCE(dev_priv->hotplug.poll_enabled, false);
|
|
|
|
schedule_work(&dev_priv->hotplug.poll_init_work);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Interrupt setup is already guaranteed to be single-threaded, this is
|
|
|
|
* just to make the assert_spin_locked checks happy.
|
|
|
|
*/
|
2017-02-15 20:15:47 +07:00
|
|
|
if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup) {
|
|
|
|
spin_lock_irq(&dev_priv->irq_lock);
|
|
|
|
if (dev_priv->display_irqs_enabled)
|
|
|
|
dev_priv->display.hpd_irq_setup(dev_priv);
|
|
|
|
spin_unlock_irq(&dev_priv->irq_lock);
|
|
|
|
}
|
2016-06-22 04:03:44 +07:00
|
|
|
}
|
|
|
|
|
2016-08-17 18:09:06 +07:00
|
|
|
static void i915_hpd_poll_init_work(struct work_struct *work)
|
|
|
|
{
|
2016-06-22 04:03:44 +07:00
|
|
|
struct drm_i915_private *dev_priv =
|
|
|
|
container_of(work, struct drm_i915_private,
|
|
|
|
hotplug.poll_init_work);
|
|
|
|
struct drm_device *dev = &dev_priv->drm;
|
|
|
|
struct drm_connector *connector;
|
2017-03-01 16:52:22 +07:00
|
|
|
struct drm_connector_list_iter conn_iter;
|
2016-06-22 04:03:44 +07:00
|
|
|
bool enabled;
|
|
|
|
|
|
|
|
mutex_lock(&dev->mode_config.mutex);
|
|
|
|
|
|
|
|
enabled = READ_ONCE(dev_priv->hotplug.poll_enabled);
|
|
|
|
|
2017-03-01 16:52:22 +07:00
|
|
|
drm_connector_list_iter_begin(dev, &conn_iter);
|
|
|
|
drm_for_each_connector_iter(connector, &conn_iter) {
|
2016-06-22 04:03:44 +07:00
|
|
|
struct intel_connector *intel_connector =
|
|
|
|
to_intel_connector(connector);
|
2015-06-18 17:06:16 +07:00
|
|
|
connector->polled = intel_connector->polled;
|
2016-01-07 22:43:28 +07:00
|
|
|
|
|
|
|
/* MST has a dynamic intel_connector->encoder and it's reprobing
|
|
|
|
* is all handled by the MST helpers. */
|
2015-06-18 17:06:16 +07:00
|
|
|
if (intel_connector->mst_port)
|
2016-01-07 22:43:28 +07:00
|
|
|
continue;
|
|
|
|
|
2016-11-07 16:29:20 +07:00
|
|
|
if (!connector->polled && I915_HAS_HOTPLUG(dev_priv) &&
|
2016-06-22 04:03:44 +07:00
|
|
|
intel_connector->encoder->hpd_pin > HPD_NONE) {
|
|
|
|
connector->polled = enabled ?
|
|
|
|
DRM_CONNECTOR_POLL_CONNECT |
|
|
|
|
DRM_CONNECTOR_POLL_DISCONNECT :
|
|
|
|
DRM_CONNECTOR_POLL_HPD;
|
|
|
|
}
|
2015-06-18 17:06:16 +07:00
|
|
|
}
|
2017-03-01 16:52:22 +07:00
|
|
|
drm_connector_list_iter_end(&conn_iter);
|
2015-06-18 17:06:16 +07:00
|
|
|
|
2016-06-22 04:03:44 +07:00
|
|
|
if (enabled)
|
2017-01-27 09:04:08 +07:00
|
|
|
drm_kms_helper_poll_enable(dev);
|
2016-06-22 04:03:44 +07:00
|
|
|
|
|
|
|
mutex_unlock(&dev->mode_config.mutex);
|
|
|
|
|
2015-06-18 17:06:16 +07:00
|
|
|
/*
|
2016-06-22 04:03:44 +07:00
|
|
|
* We might have missed any hotplugs that happened while we were
|
|
|
|
* in the middle of disabling polling
|
2015-06-18 17:06:16 +07:00
|
|
|
*/
|
2016-06-22 04:03:44 +07:00
|
|
|
if (!enabled)
|
|
|
|
drm_helper_hpd_irq_event(dev);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* intel_hpd_poll_init - enables/disables polling for connectors with hpd
|
|
|
|
* @dev_priv: i915 device instance
|
|
|
|
*
|
|
|
|
* This function enables polling for all connectors, regardless of whether or
|
|
|
|
* not they support hotplug detection. Under certain conditions HPD may not be
|
|
|
|
* functional. On most Intel GPUs, this happens when we enter runtime suspend.
|
|
|
|
* On Valleyview and Cherryview systems, this also happens when we shut off all
|
|
|
|
* of the powerwells.
|
|
|
|
*
|
|
|
|
* Since this function can get called in contexts where we're already holding
|
|
|
|
* dev->mode_config.mutex, we do the actual hotplug enabling in a seperate
|
|
|
|
* worker.
|
|
|
|
*
|
|
|
|
* Also see: intel_hpd_init(), which restores hpd handling.
|
|
|
|
*/
|
|
|
|
void intel_hpd_poll_init(struct drm_i915_private *dev_priv)
|
|
|
|
{
|
|
|
|
WRITE_ONCE(dev_priv->hotplug.poll_enabled, true);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We might already be holding dev->mode_config.mutex, so do this in a
|
|
|
|
* seperate worker
|
|
|
|
* As well, there's no issue if we race here since we always reschedule
|
|
|
|
* this worker anyway
|
|
|
|
*/
|
|
|
|
schedule_work(&dev_priv->hotplug.poll_init_work);
|
2015-06-18 17:06:16 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
void intel_hpd_init_work(struct drm_i915_private *dev_priv)
|
|
|
|
{
|
2019-07-12 07:53:42 +07:00
|
|
|
INIT_DELAYED_WORK(&dev_priv->hotplug.hotplug_work,
|
|
|
|
i915_hotplug_work_func);
|
2015-06-18 17:06:16 +07:00
|
|
|
INIT_WORK(&dev_priv->hotplug.dig_port_work, i915_digport_work_func);
|
2016-06-22 04:03:44 +07:00
|
|
|
INIT_WORK(&dev_priv->hotplug.poll_init_work, i915_hpd_poll_init_work);
|
2015-06-18 17:06:16 +07:00
|
|
|
INIT_DELAYED_WORK(&dev_priv->hotplug.reenable_work,
|
|
|
|
intel_hpd_irq_storm_reenable_work);
|
|
|
|
}
|
|
|
|
|
|
|
|
void intel_hpd_cancel_work(struct drm_i915_private *dev_priv)
|
|
|
|
{
|
|
|
|
spin_lock_irq(&dev_priv->irq_lock);
|
|
|
|
|
|
|
|
dev_priv->hotplug.long_port_mask = 0;
|
|
|
|
dev_priv->hotplug.short_port_mask = 0;
|
|
|
|
dev_priv->hotplug.event_bits = 0;
|
2019-07-12 07:53:42 +07:00
|
|
|
dev_priv->hotplug.retry_bits = 0;
|
2015-06-18 17:06:16 +07:00
|
|
|
|
|
|
|
spin_unlock_irq(&dev_priv->irq_lock);
|
|
|
|
|
|
|
|
cancel_work_sync(&dev_priv->hotplug.dig_port_work);
|
2019-07-12 07:53:42 +07:00
|
|
|
cancel_delayed_work_sync(&dev_priv->hotplug.hotplug_work);
|
2016-06-22 04:03:44 +07:00
|
|
|
cancel_work_sync(&dev_priv->hotplug.poll_init_work);
|
2015-06-18 17:06:16 +07:00
|
|
|
cancel_delayed_work_sync(&dev_priv->hotplug.reenable_work);
|
|
|
|
}
|
2016-06-22 04:03:43 +07:00
|
|
|
|
|
|
|
bool intel_hpd_disable(struct drm_i915_private *dev_priv, enum hpd_pin pin)
|
|
|
|
{
|
|
|
|
bool ret = false;
|
|
|
|
|
|
|
|
if (pin == HPD_NONE)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
spin_lock_irq(&dev_priv->irq_lock);
|
|
|
|
if (dev_priv->hotplug.stats[pin].state == HPD_ENABLED) {
|
|
|
|
dev_priv->hotplug.stats[pin].state = HPD_DISABLED;
|
|
|
|
ret = true;
|
|
|
|
}
|
|
|
|
spin_unlock_irq(&dev_priv->irq_lock);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
void intel_hpd_enable(struct drm_i915_private *dev_priv, enum hpd_pin pin)
|
|
|
|
{
|
|
|
|
if (pin == HPD_NONE)
|
|
|
|
return;
|
|
|
|
|
|
|
|
spin_lock_irq(&dev_priv->irq_lock);
|
|
|
|
dev_priv->hotplug.stats[pin].state = HPD_ENABLED;
|
|
|
|
spin_unlock_irq(&dev_priv->irq_lock);
|
|
|
|
}
|