linux_dsm_epyc7002/drivers/thermal/gov_bang_bang.c
Michele Di Giorgio d0b7306d20 thermal: fix race condition when updating cooling device
When multiple thermal zones are bound to the same cooling device, multiple
kernel threads may want to update the cooling device state by calling
thermal_cdev_update(). Having cdev not protected by a mutex can lead to a race
condition. Consider the following situation with two kernel threads k1 and k2:

	    Thread k1				Thread k2
                                    ||
                                    ||  call thermal_cdev_update()
                                    ||      ...
                                    ||      set_cur_state(cdev, target);
    call power_actor_set_power()    ||
        ...                         ||
        instance->target = state;   ||
        cdev->updated = false;      ||
                                    ||      cdev->updated = true;
                                    ||      // completes execution
    call thermal_cdev_update()      ||
        // cdev->updated == true    ||
        return;                     ||
                                    \/
                                    time

k2 has already looped through the thermal instances looking for the deepest
cooling device state and is preempted right before setting cdev->updated to
true. Now, k1 runs, modifies the thermal instance state and sets cdev->updated
to false. Then, k1 is preempted and k2 continues the execution by setting
cdev->updated to true, therefore preventing k1 from performing the update.
Notice that this is not an issue if k2 looks at the instance->target modified by
k1 "after" it is assigned by k1. In fact, in this case the update will happen
anyway and k1 can safely return immediately from thermal_cdev_update().

This may lead to a situation where a thermal governor never updates the cooling
device. For example, this is the case for the step_wise governor: when calling
the function thermal_zone_trip_update(), the governor may always get a new state
equal to the old one (which, however, wasn't notified to the cooling device) and
will therefore skip the update.

CC: Zhang Rui <rui.zhang@intel.com>
CC: Eduardo Valentin <edubezval@gmail.com>
CC: Peter Feuerer <peter@piie.net>
Reported-by: Toby Huang <toby.huang@arm.com>
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
2016-08-08 10:57:39 +08:00

139 lines
3.9 KiB
C

/*
* gov_bang_bang.c - A simple thermal throttling governor using hysteresis
*
* Copyright (C) 2014 Peter Feuerer <peter@piie.net>
*
* Based on step_wise.c with following Copyrights:
* Copyright (C) 2012 Intel Corp
* Copyright (C) 2012 Durgadoss R <durgadoss.r@intel.com>
*
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
*/
#include <linux/thermal.h>
#include "thermal_core.h"
static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
{
int trip_temp, trip_hyst;
struct thermal_instance *instance;
tz->ops->get_trip_temp(tz, trip, &trip_temp);
if (!tz->ops->get_trip_hyst) {
pr_warn_once("Undefined get_trip_hyst for thermal zone %s - "
"running with default hysteresis zero\n", tz->type);
trip_hyst = 0;
} else
tz->ops->get_trip_hyst(tz, trip, &trip_hyst);
dev_dbg(&tz->device, "Trip%d[temp=%d]:temp=%d:hyst=%d\n",
trip, trip_temp, tz->temperature,
trip_hyst);
mutex_lock(&tz->lock);
list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
if (instance->trip != trip)
continue;
/* in case fan is in initial state, switch the fan off */
if (instance->target == THERMAL_NO_TARGET)
instance->target = 0;
/* in case fan is neither on nor off set the fan to active */
if (instance->target != 0 && instance->target != 1) {
pr_warn("Thermal instance %s controlled by bang-bang has unexpected state: %ld\n",
instance->name, instance->target);
instance->target = 1;
}
/*
* enable fan when temperature exceeds trip_temp and disable
* the fan in case it falls below trip_temp minus hysteresis
*/
if (instance->target == 0 && tz->temperature >= trip_temp)
instance->target = 1;
else if (instance->target == 1 &&
tz->temperature < trip_temp - trip_hyst)
instance->target = 0;
dev_dbg(&instance->cdev->device, "target=%d\n",
(int)instance->target);
mutex_lock(&instance->cdev->lock);
instance->cdev->updated = false; /* cdev needs update */
mutex_unlock(&instance->cdev->lock);
}
mutex_unlock(&tz->lock);
}
/**
* bang_bang_control - controls devices associated with the given zone
* @tz - thermal_zone_device
* @trip - the trip point
*
* Regulation Logic: a two point regulation, deliver cooling state depending
* on the previous state shown in this diagram:
*
* Fan: OFF ON
*
* |
* |
* trip_temp: +---->+
* | | ^
* | | |
* | | Temperature
* (trip_temp - hyst): +<----+
* |
* |
* |
*
* * If the fan is not running and temperature exceeds trip_temp, the fan
* gets turned on.
* * In case the fan is running, temperature must fall below
* (trip_temp - hyst) so that the fan gets turned off again.
*
*/
static int bang_bang_control(struct thermal_zone_device *tz, int trip)
{
struct thermal_instance *instance;
thermal_zone_trip_update(tz, trip);
mutex_lock(&tz->lock);
list_for_each_entry(instance, &tz->thermal_instances, tz_node)
thermal_cdev_update(instance->cdev);
mutex_unlock(&tz->lock);
return 0;
}
static struct thermal_governor thermal_gov_bang_bang = {
.name = "bang_bang",
.throttle = bang_bang_control,
};
int thermal_gov_bang_bang_register(void)
{
return thermal_register_governor(&thermal_gov_bang_bang);
}
void thermal_gov_bang_bang_unregister(void)
{
thermal_unregister_governor(&thermal_gov_bang_bang);
}