linux_dsm_epyc7002/drivers/thermal/fair_share.c
Michele Di Giorgio d0b7306d20 thermal: fix race condition when updating cooling device
When multiple thermal zones are bound to the same cooling device, multiple
kernel threads may want to update the cooling device state by calling
thermal_cdev_update(). Having cdev not protected by a mutex can lead to a race
condition. Consider the following situation with two kernel threads k1 and k2:

	    Thread k1				Thread k2
                                    ||
                                    ||  call thermal_cdev_update()
                                    ||      ...
                                    ||      set_cur_state(cdev, target);
    call power_actor_set_power()    ||
        ...                         ||
        instance->target = state;   ||
        cdev->updated = false;      ||
                                    ||      cdev->updated = true;
                                    ||      // completes execution
    call thermal_cdev_update()      ||
        // cdev->updated == true    ||
        return;                     ||
                                    \/
                                    time

k2 has already looped through the thermal instances looking for the deepest
cooling device state and is preempted right before setting cdev->updated to
true. Now, k1 runs, modifies the thermal instance state and sets cdev->updated
to false. Then, k1 is preempted and k2 continues the execution by setting
cdev->updated to true, therefore preventing k1 from performing the update.
Notice that this is not an issue if k2 looks at the instance->target modified by
k1 "after" it is assigned by k1. In fact, in this case the update will happen
anyway and k1 can safely return immediately from thermal_cdev_update().

This may lead to a situation where a thermal governor never updates the cooling
device. For example, this is the case for the step_wise governor: when calling
the function thermal_zone_trip_update(), the governor may always get a new state
equal to the old one (which, however, wasn't notified to the cooling device) and
will therefore skip the update.

CC: Zhang Rui <rui.zhang@intel.com>
CC: Eduardo Valentin <edubezval@gmail.com>
CC: Peter Feuerer <peter@piie.net>
Reported-by: Toby Huang <toby.huang@arm.com>
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
2016-08-08 10:57:39 +08:00

142 lines
4.0 KiB
C

/*
* fair_share.c - A simple weight based Thermal governor
*
* Copyright (C) 2012 Intel Corp
* Copyright (C) 2012 Durgadoss R <durgadoss.r@intel.com>
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <linux/thermal.h>
#include <trace/events/thermal.h>
#include "thermal_core.h"
/**
* get_trip_level: - obtains the current trip level for a zone
* @tz: thermal zone device
*/
static int get_trip_level(struct thermal_zone_device *tz)
{
int count = 0;
int trip_temp;
enum thermal_trip_type trip_type;
if (tz->trips == 0 || !tz->ops->get_trip_temp)
return 0;
for (count = 0; count < tz->trips; count++) {
tz->ops->get_trip_temp(tz, count, &trip_temp);
if (tz->temperature < trip_temp)
break;
}
/*
* count > 0 only if temperature is greater than first trip
* point, in which case, trip_point = count - 1
*/
if (count > 0) {
tz->ops->get_trip_type(tz, count - 1, &trip_type);
trace_thermal_zone_trip(tz, count - 1, trip_type);
}
return count;
}
static long get_target_state(struct thermal_zone_device *tz,
struct thermal_cooling_device *cdev, int percentage, int level)
{
unsigned long max_state;
cdev->ops->get_max_state(cdev, &max_state);
return (long)(percentage * level * max_state) / (100 * tz->trips);
}
/**
* fair_share_throttle - throttles devices associated with the given zone
* @tz - thermal_zone_device
*
* Throttling Logic: This uses three parameters to calculate the new
* throttle state of the cooling devices associated with the given zone.
*
* Parameters used for Throttling:
* P1. max_state: Maximum throttle state exposed by the cooling device.
* P2. percentage[i]/100:
* How 'effective' the 'i'th device is, in cooling the given zone.
* P3. cur_trip_level/max_no_of_trips:
* This describes the extent to which the devices should be throttled.
* We do not want to throttle too much when we trip a lower temperature,
* whereas the throttling is at full swing if we trip critical levels.
* (Heavily assumes the trip points are in ascending order)
* new_state of cooling device = P3 * P2 * P1
*/
static int fair_share_throttle(struct thermal_zone_device *tz, int trip)
{
struct thermal_instance *instance;
int total_weight = 0;
int total_instance = 0;
int cur_trip_level = get_trip_level(tz);
list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
if (instance->trip != trip)
continue;
total_weight += instance->weight;
total_instance++;
}
list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
int percentage;
struct thermal_cooling_device *cdev = instance->cdev;
if (instance->trip != trip)
continue;
if (!total_weight)
percentage = 100 / total_instance;
else
percentage = (instance->weight * 100) / total_weight;
instance->target = get_target_state(tz, cdev, percentage,
cur_trip_level);
mutex_lock(&instance->cdev->lock);
instance->cdev->updated = false;
mutex_unlock(&instance->cdev->lock);
thermal_cdev_update(cdev);
}
return 0;
}
static struct thermal_governor thermal_gov_fair_share = {
.name = "fair_share",
.throttle = fair_share_throttle,
};
int thermal_gov_fair_share_register(void)
{
return thermal_register_governor(&thermal_gov_fair_share);
}
void thermal_gov_fair_share_unregister(void)
{
thermal_unregister_governor(&thermal_gov_fair_share);
}