diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 8df54443ec78..521dbd0d9af8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -75,14 +75,20 @@ struct amdgpu_dpm_thermal { int min_temp; /* high temperature threshold */ int max_temp; + /* edge max emergency(shutdown) temp */ + int max_edge_emergency_temp; /* hotspot low temperature threshold */ int min_hotspot_temp; /* hotspot high temperature critical threshold */ int max_hotspot_crit_temp; + /* hotspot max emergency(shutdown) temp */ + int max_hotspot_emergency_temp; /* memory low temperature threshold */ int min_mem_temp; /* memory high temperature critical threshold */ int max_mem_crit_temp; + /* memory max emergency(shutdown) temp */ + int max_mem_emergency_temp; /* was last interrupt low to high or high to low */ bool high_to_low; /* interrupt source */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 1f78deadb770..7093b4efc3a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1446,6 +1446,32 @@ static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev, return snprintf(buf, PAGE_SIZE, "%d\n", temp); } +static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int channel = to_sensor_dev_attr(attr)->index; + int temp = 0; + + if (channel >= PP_TEMP_MAX) + return -EINVAL; + + switch (channel) { + case PP_TEMP_JUNCTION: + temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp; + break; + case PP_TEMP_EDGE: + temp = adev->pm.dpm.thermal.max_edge_emergency_temp; + break; + case PP_TEMP_MEM: + temp = adev->pm.dpm.thermal.max_mem_emergency_temp; + break; + } + + return snprintf(buf, PAGE_SIZE, "%d\n", temp); +} + static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, struct device_attribute *attr, char *buf) @@ -2023,6 +2049,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius * - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only * + * - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius + * - these are supported on SOC15 dGPUs only + * * hwmon interfaces for GPU voltage: * * - in0_input: the voltage on the GPU in millivolts @@ -2072,10 +2101,13 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE); static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION); static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1); +static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM); static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0); static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0); static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0); @@ -2106,6 +2138,9 @@ static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr, &sensor_dev_attr_temp3_crit.dev_attr.attr, &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp1_emergency.dev_attr.attr, + &sensor_dev_attr_temp2_emergency.dev_attr.attr, + &sensor_dev_attr_temp3_emergency.dev_attr.attr, &sensor_dev_attr_pwm1.dev_attr.attr, &sensor_dev_attr_pwm1_enable.dev_attr.attr, &sensor_dev_attr_pwm1_min.dev_attr.attr, @@ -2234,7 +2269,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr || attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr || attr == &sensor_dev_attr_temp3_crit.dev_attr.attr || - attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr)) + attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr || + attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr || + attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr || + attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr)) return 0; return effective_mode; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index af6ab04130ef..cc57fb953e62 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -228,9 +228,12 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) struct PP_TemperatureRange range = { TEMP_RANGE_MIN, TEMP_RANGE_MAX, - TEMP_RANGE_MIN, TEMP_RANGE_MAX, TEMP_RANGE_MIN, + TEMP_RANGE_MAX, + TEMP_RANGE_MAX, + TEMP_RANGE_MIN, + TEMP_RANGE_MAX, TEMP_RANGE_MAX}; struct amdgpu_device *adev = hwmgr->adev; @@ -245,10 +248,13 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr) adev->pm.dpm.thermal.min_temp = range.min; adev->pm.dpm.thermal.max_temp = range.max; + adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max; adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min; adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max; + adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max; adev->pm.dpm.thermal.min_mem_temp = range.mem_min; adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max; + adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max; return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 4e1df44f094b..1422bc4e45d1 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -4859,10 +4859,16 @@ static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, thermal_data->max = pp_table->TedgeLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; thermal_data->mem_crit_max = pp_table->ThbmLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)* + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index 4f63570ea257..60c9f9502e65 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -2534,10 +2534,16 @@ static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, thermal_data->max = pp_table->TedgeLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; thermal_data->mem_crit_max = pp_table->ThbmLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)* + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 555ff8733b6b..3a9629c907bb 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -3982,10 +3982,16 @@ static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, thermal_data->max = pp_table->TedgeLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) * + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; thermal_data->mem_crit_max = pp_table->ThbmLimit * PP_TEMPERATURE_UNITS_PER_CENTIGRADES; + thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)* + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/inc/power_state.h b/drivers/gpu/drm/amd/powerplay/inc/power_state.h index a8988e7d58c6..a5f2227a3971 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/power_state.h +++ b/drivers/gpu/drm/amd/powerplay/inc/power_state.h @@ -124,10 +124,13 @@ struct PP_StateSoftwareAlgorithmBlock { struct PP_TemperatureRange { int min; int max; + int edge_emergency_max; int hotspot_min; int hotspot_crit_max; + int hotspot_emergency_max; int mem_min; int mem_crit_max; + int mem_emergency_max; }; struct PP_StateValidationBlock { diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h index 75a0a2f8bea2..3e30768f9e1c 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h +++ b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h @@ -27,14 +27,18 @@ static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] = { - {-273150, 99000, -273150, 99000, -273150, 99000}, - { 120000, 120000, 120000, 120000, 120000, 120000}, + {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000}, + { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000}, }; static const struct PP_TemperatureRange SMU7ThermalPolicy[] = { - {-273150, 99000, -273150, 99000, -273150, 99000}, - { 120000, 120000, 120000, 120000, 120000, 120000}, + {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000}, + { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000}, }; +#define CTF_OFFSET_EDGE 5 +#define CTF_OFFSET_HOTSPOT 5 +#define CTF_OFFSET_HBM 5 + #endif