linux_dsm_epyc7002/drivers/base/power/opp/opp.h
Viresh Kumar 87b4115db0 PM / OPP: Protect updates to list_dev with mutex
dev_opp_list_lock is used everywhere to protect device and OPP lists,
but dev_pm_opp_set_sharing_cpus() is missed somehow. And instead we used
rcu-lock, which wouldn't help here as we are adding a new list_dev.

This also fixes a problem where we have called kzalloc(..., GFP_KERNEL)
from within rcu-lock, which isn't allowed as kzalloc can sleep when
called with GFP_KERNEL.

With CONFIG_DEBUG_ATOMIC_SLEEP set, we get following lockdep-splat:

include/linux/rcupdate.h:578 Illegal context switch in RCU read-side critical section!

other info that might help us debug this:

rcu_scheduler_active = 1, debug_locks = 0
5 locks held by swapper/0/1:
 #0:  (&dev->mutex){......}, at: [<c02f68f4>] __driver_attach+0x48/0x98
 #1:  (&dev->mutex){......}, at: [<c02f6904>] __driver_attach+0x58/0x98
 #2:  (cpu_hotplug.lock){++++++}, at: [<c00249d0>] get_online_cpus+0x40/0xb0
 #3:  (subsys mutex#5){+.+.+.}, at: [<c02f4f8c>] subsys_interface_register+0x44/0xdc
 #4:  (rcu_read_lock){......}, at: [<c0305c80>] dev_pm_opp_set_sharing_cpus+0x0/0x1e4

stack backtrace:
CPU: 1 PID: 1 Comm: swapper/0 Tainted: G        W       4.3.0-rc7-00047-g81f5932958a8 #59
Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
[<c0016874>] (unwind_backtrace) from [<c001355c>] (show_stack+0x10/0x14)
[<c001355c>] (show_stack) from [<c022553c>] (dump_stack+0x94/0xbc)
[<c022553c>] (dump_stack) from [<c004904c>] (___might_sleep+0x24c/0x298)
[<c004904c>] (___might_sleep) from [<c00f07e4>] (kmem_cache_alloc+0xe8/0x164)
[<c00f07e4>] (kmem_cache_alloc) from [<c0305354>] (_add_list_dev+0x30/0x58)
[<c0305354>] (_add_list_dev) from [<c0305d50>] (dev_pm_opp_set_sharing_cpus+0xd0/0x1e4)
[<c0305d50>] (dev_pm_opp_set_sharing_cpus) from [<c040eda4>] (cpufreq_init+0x4cc/0x62c)
[<c040eda4>] (cpufreq_init) from [<c040a964>] (cpufreq_online+0xbc/0x73c)
[<c040a964>] (cpufreq_online) from [<c02f4fe0>] (subsys_interface_register+0x98/0xdc)
[<c02f4fe0>] (subsys_interface_register) from [<c040a640>] (cpufreq_register_driver+0x110/0x17c)
[<c040a640>] (cpufreq_register_driver) from [<c040ef64>] (dt_cpufreq_probe+0x60/0x8c)
[<c040ef64>] (dt_cpufreq_probe) from [<c02f8084>] (platform_drv_probe+0x44/0xa4)
[<c02f8084>] (platform_drv_probe) from [<c02f67c0>] (driver_probe_device+0x208/0x2f4)
[<c02f67c0>] (driver_probe_device) from [<c02f6940>] (__driver_attach+0x94/0x98)
[<c02f6940>] (__driver_attach) from [<c02f4c1c>] (bus_for_each_dev+0x68/0x9c)

Reported-by: Michael Turquette <mturquette@baylibre.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: 4.3 <stable@vger.kernel.org> # 4.3
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2015-11-06 23:59:41 +01:00

147 lines
4.7 KiB
C

/*
* Generic OPP Interface
*
* Copyright (C) 2009-2010 Texas Instruments Incorporated.
* Nishanth Menon
* Romit Dasgupta
* Kevin Hilman
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef __DRIVER_OPP_H__
#define __DRIVER_OPP_H__
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/pm_opp.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
/* Lock to allow exclusive modification to the device and opp lists */
extern struct mutex dev_opp_list_lock;
/*
* Internal data structure organization with the OPP layer library is as
* follows:
* dev_opp_list (root)
* |- device 1 (represents voltage domain 1)
* | |- opp 1 (availability, freq, voltage)
* | |- opp 2 ..
* ... ...
* | `- opp n ..
* |- device 2 (represents the next voltage domain)
* ...
* `- device m (represents mth voltage domain)
* device 1, 2.. are represented by dev_opp structure while each opp
* is represented by the opp structure.
*/
/**
* struct dev_pm_opp - Generic OPP description structure
* @node: opp list node. The nodes are maintained throughout the lifetime
* of boot. It is expected only an optimal set of OPPs are
* added to the library by the SoC framework.
* RCU usage: opp list is traversed with RCU locks. node
* modification is possible realtime, hence the modifications
* are protected by the dev_opp_list_lock for integrity.
* IMPORTANT: the opp nodes should be maintained in increasing
* order.
* @dynamic: not-created from static DT entries.
* @available: true/false - marks if this OPP as available or not
* @turbo: true if turbo (boost) OPP
* @rate: Frequency in hertz
* @u_volt: Target voltage in microvolts corresponding to this OPP
* @u_volt_min: Minimum voltage in microvolts corresponding to this OPP
* @u_volt_max: Maximum voltage in microvolts corresponding to this OPP
* @u_amp: Maximum current drawn by the device in microamperes
* @clock_latency_ns: Latency (in nanoseconds) of switching to this OPP's
* frequency from any other OPP's frequency.
* @dev_opp: points back to the device_opp struct this opp belongs to
* @rcu_head: RCU callback head used for deferred freeing
* @np: OPP's device node.
*
* This structure stores the OPP information for a given device.
*/
struct dev_pm_opp {
struct list_head node;
bool available;
bool dynamic;
bool turbo;
unsigned long rate;
unsigned long u_volt;
unsigned long u_volt_min;
unsigned long u_volt_max;
unsigned long u_amp;
unsigned long clock_latency_ns;
struct device_opp *dev_opp;
struct rcu_head rcu_head;
struct device_node *np;
};
/**
* struct device_list_opp - devices managed by 'struct device_opp'
* @node: list node
* @dev: device to which the struct object belongs
* @rcu_head: RCU callback head used for deferred freeing
*
* This is an internal data structure maintaining the list of devices that are
* managed by 'struct device_opp'.
*/
struct device_list_opp {
struct list_head node;
const struct device *dev;
struct rcu_head rcu_head;
};
/**
* struct device_opp - Device opp structure
* @node: list node - contains the devices with OPPs that
* have been registered. Nodes once added are not modified in this
* list.
* RCU usage: nodes are not modified in the list of device_opp,
* however addition is possible and is secured by dev_opp_list_lock
* @srcu_head: notifier head to notify the OPP availability changes.
* @rcu_head: RCU callback head used for deferred freeing
* @dev_list: list of devices that share these OPPs
* @opp_list: list of opps
* @np: struct device_node pointer for opp's DT node.
* @shared_opp: OPP is shared between multiple devices.
*
* This is an internal data structure maintaining the link to opps attached to
* a device. This structure is not meant to be shared to users as it is
* meant for book keeping and private to OPP library.
*
* Because the opp structures can be used from both rcu and srcu readers, we
* need to wait for the grace period of both of them before freeing any
* resources. And so we have used kfree_rcu() from within call_srcu() handlers.
*/
struct device_opp {
struct list_head node;
struct srcu_notifier_head srcu_head;
struct rcu_head rcu_head;
struct list_head dev_list;
struct list_head opp_list;
struct device_node *np;
unsigned long clock_latency_ns_max;
bool shared_opp;
struct dev_pm_opp *suspend_opp;
};
/* Routines internal to opp core */
struct device_opp *_find_device_opp(struct device *dev);
struct device_list_opp *_add_list_dev(const struct device *dev,
struct device_opp *dev_opp);
struct device_node *_of_get_opp_desc_node(struct device *dev);
#endif /* __DRIVER_OPP_H__ */