drm/amd/powerplay: expose current hotspot and memory temperatures V2

Two new hwmon interfaces(temp2_input and temp3_input) are added.
They are supported on SOC15 dGPUs only.

- V2: correct thermal sensor output

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Evan Quan 2019-04-18 13:51:53 +08:00 committed by Alex Deucher
parent ada2b8f1c8
commit a34d1166b4
5 changed files with 90 additions and 9 deletions

View File

@ -1382,6 +1382,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
struct drm_device *ddev = adev->ddev;
int channel = to_sensor_dev_attr(attr)->index;
int r, temp, size = sizeof(temp);
/* Can't get temperature when the card is off */
@ -1389,11 +1390,32 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
(ddev->switch_power_state != DRM_SWITCH_POWER_ON))
return -EINVAL;
/* get the temperature */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
if (channel >= PP_TEMP_MAX)
return -EINVAL;
switch (channel) {
case PP_TEMP_JUNCTION:
/* get current junction temperature */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
(void *)&temp, &size);
if (r)
return r;
break;
case PP_TEMP_EDGE:
/* get current edge temperature */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP,
(void *)&temp, &size);
if (r)
return r;
break;
case PP_TEMP_MEM:
/* get current memory temperature */
r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP,
(void *)&temp, &size);
if (r)
return r;
break;
}
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
}
@ -2041,7 +2063,8 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
*
* hwmon interfaces for GPU temperature:
*
* - temp1_input: the on die GPU temperature in millidegrees Celsius
* - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius
* - temp2_input and temp3_input are supported on SOC15 dGPUs only
*
* - temp[1-3]_crit: temperature critical max value in millidegrees Celsius
* - temp2_crit and temp3_crit are supported on SOC15 dGPUs only
@ -2098,13 +2121,15 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
*
*/
static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE);
static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE);
static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION);
static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1);
static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION);
static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM);
static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM);
@ -2134,8 +2159,10 @@ static struct attribute *hwmon_attributes[] = {
&sensor_dev_attr_temp1_input.dev_attr.attr,
&sensor_dev_attr_temp1_crit.dev_attr.attr,
&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
&sensor_dev_attr_temp2_input.dev_attr.attr,
&sensor_dev_attr_temp2_crit.dev_attr.attr,
&sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
&sensor_dev_attr_temp3_input.dev_attr.attr,
&sensor_dev_attr_temp3_crit.dev_attr.attr,
&sensor_dev_attr_temp3_crit_hyst.dev_attr.attr,
&sensor_dev_attr_temp1_emergency.dev_attr.attr,
@ -2272,7 +2299,9 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr))
attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr ||
attr == &sensor_dev_attr_temp2_input.dev_attr.attr ||
attr == &sensor_dev_attr_temp3_input.dev_attr.attr))
return 0;
return effective_mode;

View File

@ -111,6 +111,9 @@ enum amd_pp_sensors {
AMDGPU_PP_SENSOR_GPU_LOAD,
AMDGPU_PP_SENSOR_GFX_MCLK,
AMDGPU_PP_SENSOR_GPU_TEMP,
AMDGPU_PP_SENSOR_EDGE_TEMP = AMDGPU_PP_SENSOR_GPU_TEMP,
AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
AMDGPU_PP_SENSOR_MEM_TEMP,
AMDGPU_PP_SENSOR_VCE_POWER,
AMDGPU_PP_SENSOR_UVD_POWER,
AMDGPU_PP_SENSOR_GPU_POWER,

View File

@ -3785,6 +3785,18 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx,
*((uint32_t *)value) = vega10_thermal_get_temperature(hwmgr);
*size = 4;
break;
case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHotspot);
*((uint32_t *)value) = smum_get_argument(hwmgr) *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
*size = 4;
break;
case AMDGPU_PP_SENSOR_MEM_TEMP:
smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHBM);
*((uint32_t *)value) = smum_get_argument(hwmgr) *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
*size = 4;
break;
case AMDGPU_PP_SENSOR_UVD_POWER:
*((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
*size = 4;

View File

@ -1338,6 +1338,7 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
void *value, int *size)
{
struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend);
SmuMetrics_t metrics_table;
int ret = 0;
switch (idx) {
@ -1360,6 +1361,24 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
*((uint32_t *)value) = vega12_thermal_get_temperature(hwmgr);
*size = 4;
break;
case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
ret = vega12_get_metrics_table(hwmgr, &metrics_table);
if (ret)
return ret;
*((uint32_t *)value) = metrics_table.TemperatureHotspot *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
*size = 4;
break;
case AMDGPU_PP_SENSOR_MEM_TEMP:
ret = vega12_get_metrics_table(hwmgr, &metrics_table);
if (ret)
return ret;
*((uint32_t *)value) = metrics_table.TemperatureHBM *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
*size = 4;
break;
case AMDGPU_PP_SENSOR_UVD_POWER:
*((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
*size = 4;

View File

@ -2138,10 +2138,28 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx,
if (!ret)
*size = 4;
break;
case AMDGPU_PP_SENSOR_GPU_TEMP:
case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
*((uint32_t *)value) = vega20_thermal_get_temperature(hwmgr);
*size = 4;
break;
case AMDGPU_PP_SENSOR_EDGE_TEMP:
ret = vega20_get_metrics_table(hwmgr, &metrics_table);
if (ret)
return ret;
*((uint32_t *)value) = metrics_table.TemperatureEdge *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
*size = 4;
break;
case AMDGPU_PP_SENSOR_MEM_TEMP:
ret = vega20_get_metrics_table(hwmgr, &metrics_table);
if (ret)
return ret;
*((uint32_t *)value) = metrics_table.TemperatureHBM *
PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
*size = 4;
break;
case AMDGPU_PP_SENSOR_UVD_POWER:
*((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
*size = 4;