linux_dsm_epyc7002/drivers/net/phy/mdio_bus.c

639 lines
15 KiB
C
Raw Normal View History

/* MDIO Bus interface
*
* Author: Andy Fleming
*
* Copyright (c) 2004 Freescale Semiconductor, Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/unistd.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/of_device.h>
#include <linux/of_mdio.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/mii.h>
#include <linux/ethtool.h>
#include <linux/phy.h>
#include <linux/io.h>
#include <linux/uaccess.h>
#include <asm/irq.h>
/**
* mdiobus_alloc_size - allocate a mii_bus structure
* @size: extra amount of memory to allocate for private storage.
* If non-zero, then bus->priv is points to that memory.
*
* Description: called by a bus driver to allocate an mii_bus
* structure to fill in.
*/
struct mii_bus *mdiobus_alloc_size(size_t size)
{
struct mii_bus *bus;
size_t aligned_size = ALIGN(sizeof(*bus), NETDEV_ALIGN);
size_t alloc_size;
/* If we alloc extra space, it should be aligned */
if (size)
alloc_size = aligned_size + size;
else
alloc_size = sizeof(*bus);
bus = kzalloc(alloc_size, GFP_KERNEL);
if (bus) {
bus->state = MDIOBUS_ALLOCATED;
if (size)
bus->priv = (void *)bus + aligned_size;
}
return bus;
}
EXPORT_SYMBOL(mdiobus_alloc_size);
static void _devm_mdiobus_free(struct device *dev, void *res)
{
mdiobus_free(*(struct mii_bus **)res);
}
static int devm_mdiobus_match(struct device *dev, void *res, void *data)
{
struct mii_bus **r = res;
if (WARN_ON(!r || !*r))
return 0;
return *r == data;
}
/**
* devm_mdiobus_alloc_size - Resource-managed mdiobus_alloc_size()
* @dev: Device to allocate mii_bus for
* @sizeof_priv: Space to allocate for private structure.
*
* Managed mdiobus_alloc_size. mii_bus allocated with this function is
* automatically freed on driver detach.
*
* If an mii_bus allocated with this function needs to be freed separately,
* devm_mdiobus_free() must be used.
*
* RETURNS:
* Pointer to allocated mii_bus on success, NULL on failure.
*/
struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv)
{
struct mii_bus **ptr, *bus;
ptr = devres_alloc(_devm_mdiobus_free, sizeof(*ptr), GFP_KERNEL);
if (!ptr)
return NULL;
/* use raw alloc_dr for kmalloc caller tracing */
bus = mdiobus_alloc_size(sizeof_priv);
if (bus) {
*ptr = bus;
devres_add(dev, ptr);
} else {
devres_free(ptr);
}
return bus;
}
EXPORT_SYMBOL_GPL(devm_mdiobus_alloc_size);
/**
* devm_mdiobus_free - Resource-managed mdiobus_free()
* @dev: Device this mii_bus belongs to
* @bus: the mii_bus associated with the device
*
* Free mii_bus allocated with devm_mdiobus_alloc_size().
*/
void devm_mdiobus_free(struct device *dev, struct mii_bus *bus)
{
int rc;
rc = devres_release(dev, _devm_mdiobus_free,
devm_mdiobus_match, bus);
WARN_ON(rc);
}
EXPORT_SYMBOL_GPL(devm_mdiobus_free);
/**
* mdiobus_release - mii_bus device release callback
* @d: the target struct device that contains the mii_bus
*
* Description: called when the last reference to an mii_bus is
* dropped, to free the underlying memory.
*/
static void mdiobus_release(struct device *d)
{
struct mii_bus *bus = to_mii_bus(d);
BUG_ON(bus->state != MDIOBUS_RELEASED &&
/* for compatibility with error handling in drivers */
bus->state != MDIOBUS_ALLOCATED);
kfree(bus);
}
static struct class mdio_bus_class = {
.name = "mdio_bus",
.dev_release = mdiobus_release,
};
#if IS_ENABLED(CONFIG_OF_MDIO)
/* Helper function for of_mdio_find_bus */
static int of_mdio_bus_match(struct device *dev, const void *mdio_bus_np)
{
return dev->of_node == mdio_bus_np;
}
/**
* of_mdio_find_bus - Given an mii_bus node, find the mii_bus.
* @mdio_bus_np: Pointer to the mii_bus.
*
* Returns a reference to the mii_bus, or NULL if none found. The
* embedded struct device will have its reference count incremented,
* and this must be put once the bus is finished with.
*
* Because the association of a device_node and mii_bus is made via
* of_mdiobus_register(), the mii_bus cannot be found before it is
* registered with of_mdiobus_register().
*
*/
struct mii_bus *of_mdio_find_bus(struct device_node *mdio_bus_np)
{
struct device *d;
if (!mdio_bus_np)
return NULL;
d = class_find_device(&mdio_bus_class, NULL, mdio_bus_np,
of_mdio_bus_match);
return d ? to_mii_bus(d) : NULL;
}
EXPORT_SYMBOL(of_mdio_find_bus);
/* Walk the list of subnodes of a mdio bus and look for a node that matches the
* phy's address with its 'reg' property. If found, set the of_node pointer for
* the phy. This allows auto-probed pyh devices to be supplied with information
* passed in via DT.
*/
static void of_mdiobus_link_phydev(struct mii_bus *mdio,
struct phy_device *phydev)
{
struct device *dev = &phydev->dev;
struct device_node *child;
if (dev->of_node || !mdio->dev.of_node)
return;
for_each_available_child_of_node(mdio->dev.of_node, child) {
int addr;
int ret;
ret = of_property_read_u32(child, "reg", &addr);
if (ret < 0) {
dev_err(dev, "%s has invalid PHY address\n",
child->full_name);
continue;
}
/* A PHY must have a reg property in the range [0-31] */
if (addr >= PHY_MAX_ADDR) {
dev_err(dev, "%s PHY address %i is too large\n",
child->full_name, addr);
continue;
}
if (addr == phydev->addr) {
dev->of_node = child;
return;
}
}
}
#else /* !IS_ENABLED(CONFIG_OF_MDIO) */
static inline void of_mdiobus_link_phydev(struct mii_bus *mdio,
struct phy_device *phydev)
{
}
#endif
/**
* __mdiobus_register - bring up all the PHYs on a given bus and attach them to bus
* @bus: target mii_bus
* @owner: module containing bus accessor functions
*
* Description: Called by a bus driver to bring up all the PHYs
* on a given bus, and attach them to the bus. Drivers should use
* mdiobus_register() rather than __mdiobus_register() unless they
* need to pass a specific owner module.
*
* Returns 0 on success or < 0 on error.
*/
int __mdiobus_register(struct mii_bus *bus, struct module *owner)
{
int i, err;
if (NULL == bus || NULL == bus->name ||
NULL == bus->read || NULL == bus->write)
return -EINVAL;
BUG_ON(bus->state != MDIOBUS_ALLOCATED &&
bus->state != MDIOBUS_UNREGISTERED);
bus->owner = owner;
bus->dev.parent = bus->parent;
bus->dev.class = &mdio_bus_class;
bus->dev.groups = NULL;
dev_set_name(&bus->dev, "%s", bus->id);
err = device_register(&bus->dev);
if (err) {
pr_err("mii_bus %s failed to register\n", bus->id);
put_device(&bus->dev);
return -EINVAL;
}
mutex_init(&bus->mdio_lock);
if (bus->reset)
bus->reset(bus);
for (i = 0; i < PHY_MAX_ADDR; i++) {
if ((bus->phy_mask & (1 << i)) == 0) {
struct phy_device *phydev;
phydev = mdiobus_scan(bus, i);
if (IS_ERR(phydev)) {
err = PTR_ERR(phydev);
goto error;
}
}
}
bus->state = MDIOBUS_REGISTERED;
pr_info("%s: probed\n", bus->name);
return 0;
error:
while (--i >= 0) {
struct phy_device *phydev = bus->phy_map[i];
if (phydev) {
phy_device_remove(phydev);
phy_device_free(phydev);
}
}
device_del(&bus->dev);
return err;
}
EXPORT_SYMBOL(__mdiobus_register);
void mdiobus_unregister(struct mii_bus *bus)
{
int i;
BUG_ON(bus->state != MDIOBUS_REGISTERED);
bus->state = MDIOBUS_UNREGISTERED;
for (i = 0; i < PHY_MAX_ADDR; i++) {
struct phy_device *phydev = bus->phy_map[i];
if (phydev) {
phy_device_remove(phydev);
phy_device_free(phydev);
}
}
phylib: fix device deletion order in mdiobus_unregister() commit 8b63ec1837fa ("phylib: Make PHYs children of their MDIO bus, not the bus' parent.") uncovered a problem in mdiobus_unregister() which leads to this warning when I reboot an APM Mustang (arm64) platform: WARNING: CPU: 7 PID: 4239 at fs/sysfs/group.c:224 sysfs_remove_group+0xa0/0xa4() sysfs group fffffe0000e07a10 not found for kobject 'xgene-mii-eth0:03' ... CPU: 7 PID: 4239 Comm: reboot Tainted: G E 4.2.0-0.18.el7.test15.aarch64 #1 Hardware name: AppliedMicro Mustang/Mustang, BIOS 1.1.0 Aug 26 2015 Call Trace: [<fffffe000009739c>] dump_backtrace+0x0/0x170 [<fffffe000009752c>] show_stack+0x20/0x2c [<fffffe00007436f0>] dump_stack+0x78/0x9c [<fffffe00000c2cb4>] warn_slowpath_common+0xa0/0xd8 [<fffffe00000c2d60>] warn_slowpath_fmt+0x74/0x88 [<fffffe0000293d3c>] sysfs_remove_group+0x9c/0xa4 [<fffffe00004a8bac>] dpm_sysfs_remove+0x5c/0x70 [<fffffe000049b388>] device_del+0x44/0x208 [<fffffe000049b578>] device_unregister+0x2c/0x7c [<fffffe000050dc68>] mdiobus_unregister+0x48/0x94 [<fffffe000052afd0>] xgene_enet_mdio_remove+0x28/0x44 [<fffffe000052d3f0>] xgene_enet_remove+0xd0/0xd8 [<fffffe000052d424>] xgene_enet_shutdown+0x2c/0x3c [<fffffe00004a204c>] platform_drv_shutdown+0x24/0x40 [<fffffe000049d4f4>] device_shutdown+0xf0/0x1b4 [<fffffe00000e31ec>] kernel_restart_prepare+0x40/0x4c [<fffffe00000e32f8>] kernel_restart+0x1c/0x80 [<fffffe00000e3670>] SyS_reboot+0x17c/0x250 The problem is that mdiobus_unregister() deletes the bus device before unregistering the phy devices on the bus. This wasn't a problem before because the phys were not children of the bus: /sys/devices/platform/APMC0D05:00/net/eth0/xgene-mii-eth0:03 /sys/devices/platform/APMC0D05:00/net/eth0/xgene-mii-eth0 But now that they are: /sys/devices/platform/APMC0D05:00/net/eth0/xgene-mii-eth0/xgene-mii-eth0:03 when mdiobus_unregister deletes the bus device, the phy subdirs are removed from sysfs also. So when the phys are unregistered afterward, we get the warning. This patch changes the order so that phys are unregistered before the bus device is deleted. Fixes: 8b63ec1837fa ("phylib: Make PHYs children of their MDIO bus, not the bus' parent.") Signed-off-by: Mark Salter <msalter@redhat.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Tested-by: Mark Langsdorf <mlangsdo@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-01 20:36:05 +07:00
device_del(&bus->dev);
}
EXPORT_SYMBOL(mdiobus_unregister);
/**
* mdiobus_free - free a struct mii_bus
* @bus: mii_bus to free
*
* This function releases the reference to the underlying device
* object in the mii_bus. If this is the last reference, the mii_bus
* will be freed.
*/
void mdiobus_free(struct mii_bus *bus)
{
/* For compatibility with error handling in drivers. */
if (bus->state == MDIOBUS_ALLOCATED) {
kfree(bus);
return;
}
BUG_ON(bus->state != MDIOBUS_UNREGISTERED);
bus->state = MDIOBUS_RELEASED;
put_device(&bus->dev);
}
EXPORT_SYMBOL(mdiobus_free);
struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr)
{
struct phy_device *phydev;
int err;
phydev = get_phy_device(bus, addr, false);
if (IS_ERR(phydev) || phydev == NULL)
return phydev;
/*
* For DT, see if the auto-probed phy has a correspoding child
* in the bus node, and set the of_node pointer in this case.
*/
of_mdiobus_link_phydev(bus, phydev);
err = phy_device_register(phydev);
if (err) {
phy_device_free(phydev);
return NULL;
}
return phydev;
}
EXPORT_SYMBOL(mdiobus_scan);
/**
* mdiobus_read - Convenience function for reading a given MII mgmt register
* @bus: the mii_bus struct
* @addr: the phy address
* @regnum: register number to read
*
* NOTE: MUST NOT be called from interrupt context,
* because the bus read/write functions may wait for an interrupt
* to conclude the operation.
*/
int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum)
{
int retval;
BUG_ON(in_interrupt());
mutex_lock(&bus->mdio_lock);
retval = bus->read(bus, addr, regnum);
mutex_unlock(&bus->mdio_lock);
return retval;
}
EXPORT_SYMBOL(mdiobus_read);
/**
* mdiobus_write - Convenience function for writing a given MII mgmt register
* @bus: the mii_bus struct
* @addr: the phy address
* @regnum: register number to write
* @val: value to write to @regnum
*
* NOTE: MUST NOT be called from interrupt context,
* because the bus read/write functions may wait for an interrupt
* to conclude the operation.
*/
int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val)
{
int err;
BUG_ON(in_interrupt());
mutex_lock(&bus->mdio_lock);
err = bus->write(bus, addr, regnum, val);
mutex_unlock(&bus->mdio_lock);
return err;
}
EXPORT_SYMBOL(mdiobus_write);
/**
* mdio_bus_match - determine if given PHY driver supports the given PHY device
* @dev: target PHY device
* @drv: given PHY driver
*
* Description: Given a PHY device, and a PHY driver, return 1 if
* the driver supports the device. Otherwise, return 0.
*/
static int mdio_bus_match(struct device *dev, struct device_driver *drv)
{
struct phy_device *phydev = to_phy_device(dev);
struct phy_driver *phydrv = to_phy_driver(drv);
const int num_ids = ARRAY_SIZE(phydev->c45_ids.device_ids);
int i;
if (of_driver_match_device(dev, drv))
return 1;
if (phydrv->match_phy_device)
return phydrv->match_phy_device(phydev);
if (phydev->is_c45) {
for (i = 1; i < num_ids; i++) {
if (!(phydev->c45_ids.devices_in_package & (1 << i)))
continue;
if ((phydrv->phy_id & phydrv->phy_id_mask) ==
(phydev->c45_ids.device_ids[i] &
phydrv->phy_id_mask))
return 1;
}
return 0;
} else {
return (phydrv->phy_id & phydrv->phy_id_mask) ==
(phydev->phy_id & phydrv->phy_id_mask);
}
}
#ifdef CONFIG_PM
static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
{
struct device_driver *drv = phydev->dev.driver;
struct phy_driver *phydrv = to_phy_driver(drv);
struct net_device *netdev = phydev->attached_dev;
if (!drv || !phydrv->suspend)
return false;
/* PHY not attached? May suspend if the PHY has not already been
* suspended as part of a prior call to phy_disconnect() ->
* phy_detach() -> phy_suspend() because the parent netdev might be the
* MDIO bus driver and clock gated at this point.
*/
if (!netdev)
return !phydev->suspended;
/* Don't suspend PHY if the attched netdev parent may wakeup.
* The parent may point to a PCI device, as in tg3 driver.
*/
if (netdev->dev.parent && device_may_wakeup(netdev->dev.parent))
return false;
/* Also don't suspend PHY if the netdev itself may wakeup. This
* is the case for devices w/o underlaying pwr. mgmt. aware bus,
* e.g. SoC devices.
*/
if (device_may_wakeup(&netdev->dev))
return false;
return true;
}
static int mdio_bus_suspend(struct device *dev)
{
struct phy_device *phydev = to_phy_device(dev);
/* We must stop the state machine manually, otherwise it stops out of
phylib: Fix deadlock on resume Sometimes kernel hangs on resume with the following trace: ucc_geth e0102000.ucc: resume INFO: task bash:1764 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. bash D 0fecf43c 0 1764 1763 0x00000000 Call Trace: [cf9a7c10] [c0012868] ret_from_except+0x0/0x14 (unreliable) --- Exception: cf9a7ce0 at __switch_to+0x4c/0x6c LR = 0xcf9a7cc0 [cf9a7cd0] [c0008c14] __switch_to+0x4c/0x6c (unreliable) [cf9a7ce0] [c028bcfc] schedule+0x158/0x260 [cf9a7d10] [c028c720] __mutex_lock_slowpath+0x80/0xd8 [cf9a7d40] [c01cf388] phy_stop+0x20/0x70 [cf9a7d50] [c01d514c] ugeth_resume+0x6c/0x13c [...] Here is why. On suspend: - PM core starts suspending devices, ucc_geth_suspend gets called; - ucc_geth calls phy_stop() on suspend. Note that phy_stop() is mostly asynchronous so it doesn't block ucc_geth's suspend routine, it just sets PHY_HALTED state and disables PHY's interrupts; - Suddenly the state machine gets scheduled, it grabs the phydev->lock mutex and tries to process the PHY_HALTED state, so it calls phydev->adjust_link(phydev->attached_dev). In ucc_geth case adjust_link() calls msleep(), which reschedules the code flow back to PM core, which now finishes suspend and so we end up sleeping with phydev->lock mutex held. On resume: - PM core starts resuming devices (notice that nobody rescheduled the state machine yet, so the mutex is still held), the core calls ucc_geth's resume routine; - ucc_geth_resume restarts the PHY with phy_stop()/phy_start() sequence, and the phy_*() calls are trying to grab the phydev->lock mutex. Here comes the deadlock. This patch fixes the issue by stopping the state machine on suspend and starting it again on resume. Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-12-30 15:23:28 +07:00
* control, possibly with the phydev->lock held. Upon resume, netdev
* may call phy routines that try to grab the same lock, and that may
* lead to a deadlock.
*/
if (phydev->attached_dev && phydev->adjust_link)
phylib: Fix deadlock on resume Sometimes kernel hangs on resume with the following trace: ucc_geth e0102000.ucc: resume INFO: task bash:1764 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. bash D 0fecf43c 0 1764 1763 0x00000000 Call Trace: [cf9a7c10] [c0012868] ret_from_except+0x0/0x14 (unreliable) --- Exception: cf9a7ce0 at __switch_to+0x4c/0x6c LR = 0xcf9a7cc0 [cf9a7cd0] [c0008c14] __switch_to+0x4c/0x6c (unreliable) [cf9a7ce0] [c028bcfc] schedule+0x158/0x260 [cf9a7d10] [c028c720] __mutex_lock_slowpath+0x80/0xd8 [cf9a7d40] [c01cf388] phy_stop+0x20/0x70 [cf9a7d50] [c01d514c] ugeth_resume+0x6c/0x13c [...] Here is why. On suspend: - PM core starts suspending devices, ucc_geth_suspend gets called; - ucc_geth calls phy_stop() on suspend. Note that phy_stop() is mostly asynchronous so it doesn't block ucc_geth's suspend routine, it just sets PHY_HALTED state and disables PHY's interrupts; - Suddenly the state machine gets scheduled, it grabs the phydev->lock mutex and tries to process the PHY_HALTED state, so it calls phydev->adjust_link(phydev->attached_dev). In ucc_geth case adjust_link() calls msleep(), which reschedules the code flow back to PM core, which now finishes suspend and so we end up sleeping with phydev->lock mutex held. On resume: - PM core starts resuming devices (notice that nobody rescheduled the state machine yet, so the mutex is still held), the core calls ucc_geth's resume routine; - ucc_geth_resume restarts the PHY with phy_stop()/phy_start() sequence, and the phy_*() calls are trying to grab the phydev->lock mutex. Here comes the deadlock. This patch fixes the issue by stopping the state machine on suspend and starting it again on resume. Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-12-30 15:23:28 +07:00
phy_stop_machine(phydev);
if (!mdio_bus_phy_may_suspend(phydev))
return 0;
phylib: Fix deadlock on resume Sometimes kernel hangs on resume with the following trace: ucc_geth e0102000.ucc: resume INFO: task bash:1764 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. bash D 0fecf43c 0 1764 1763 0x00000000 Call Trace: [cf9a7c10] [c0012868] ret_from_except+0x0/0x14 (unreliable) --- Exception: cf9a7ce0 at __switch_to+0x4c/0x6c LR = 0xcf9a7cc0 [cf9a7cd0] [c0008c14] __switch_to+0x4c/0x6c (unreliable) [cf9a7ce0] [c028bcfc] schedule+0x158/0x260 [cf9a7d10] [c028c720] __mutex_lock_slowpath+0x80/0xd8 [cf9a7d40] [c01cf388] phy_stop+0x20/0x70 [cf9a7d50] [c01d514c] ugeth_resume+0x6c/0x13c [...] Here is why. On suspend: - PM core starts suspending devices, ucc_geth_suspend gets called; - ucc_geth calls phy_stop() on suspend. Note that phy_stop() is mostly asynchronous so it doesn't block ucc_geth's suspend routine, it just sets PHY_HALTED state and disables PHY's interrupts; - Suddenly the state machine gets scheduled, it grabs the phydev->lock mutex and tries to process the PHY_HALTED state, so it calls phydev->adjust_link(phydev->attached_dev). In ucc_geth case adjust_link() calls msleep(), which reschedules the code flow back to PM core, which now finishes suspend and so we end up sleeping with phydev->lock mutex held. On resume: - PM core starts resuming devices (notice that nobody rescheduled the state machine yet, so the mutex is still held), the core calls ucc_geth's resume routine; - ucc_geth_resume restarts the PHY with phy_stop()/phy_start() sequence, and the phy_*() calls are trying to grab the phydev->lock mutex. Here comes the deadlock. This patch fixes the issue by stopping the state machine on suspend and starting it again on resume. Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-12-30 15:23:28 +07:00
return phy_suspend(phydev);
}
static int mdio_bus_resume(struct device *dev)
{
struct phy_device *phydev = to_phy_device(dev);
phylib: Fix deadlock on resume Sometimes kernel hangs on resume with the following trace: ucc_geth e0102000.ucc: resume INFO: task bash:1764 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. bash D 0fecf43c 0 1764 1763 0x00000000 Call Trace: [cf9a7c10] [c0012868] ret_from_except+0x0/0x14 (unreliable) --- Exception: cf9a7ce0 at __switch_to+0x4c/0x6c LR = 0xcf9a7cc0 [cf9a7cd0] [c0008c14] __switch_to+0x4c/0x6c (unreliable) [cf9a7ce0] [c028bcfc] schedule+0x158/0x260 [cf9a7d10] [c028c720] __mutex_lock_slowpath+0x80/0xd8 [cf9a7d40] [c01cf388] phy_stop+0x20/0x70 [cf9a7d50] [c01d514c] ugeth_resume+0x6c/0x13c [...] Here is why. On suspend: - PM core starts suspending devices, ucc_geth_suspend gets called; - ucc_geth calls phy_stop() on suspend. Note that phy_stop() is mostly asynchronous so it doesn't block ucc_geth's suspend routine, it just sets PHY_HALTED state and disables PHY's interrupts; - Suddenly the state machine gets scheduled, it grabs the phydev->lock mutex and tries to process the PHY_HALTED state, so it calls phydev->adjust_link(phydev->attached_dev). In ucc_geth case adjust_link() calls msleep(), which reschedules the code flow back to PM core, which now finishes suspend and so we end up sleeping with phydev->lock mutex held. On resume: - PM core starts resuming devices (notice that nobody rescheduled the state machine yet, so the mutex is still held), the core calls ucc_geth's resume routine; - ucc_geth_resume restarts the PHY with phy_stop()/phy_start() sequence, and the phy_*() calls are trying to grab the phydev->lock mutex. Here comes the deadlock. This patch fixes the issue by stopping the state machine on suspend and starting it again on resume. Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-12-30 15:23:28 +07:00
int ret;
if (!mdio_bus_phy_may_suspend(phydev))
phylib: Fix deadlock on resume Sometimes kernel hangs on resume with the following trace: ucc_geth e0102000.ucc: resume INFO: task bash:1764 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. bash D 0fecf43c 0 1764 1763 0x00000000 Call Trace: [cf9a7c10] [c0012868] ret_from_except+0x0/0x14 (unreliable) --- Exception: cf9a7ce0 at __switch_to+0x4c/0x6c LR = 0xcf9a7cc0 [cf9a7cd0] [c0008c14] __switch_to+0x4c/0x6c (unreliable) [cf9a7ce0] [c028bcfc] schedule+0x158/0x260 [cf9a7d10] [c028c720] __mutex_lock_slowpath+0x80/0xd8 [cf9a7d40] [c01cf388] phy_stop+0x20/0x70 [cf9a7d50] [c01d514c] ugeth_resume+0x6c/0x13c [...] Here is why. On suspend: - PM core starts suspending devices, ucc_geth_suspend gets called; - ucc_geth calls phy_stop() on suspend. Note that phy_stop() is mostly asynchronous so it doesn't block ucc_geth's suspend routine, it just sets PHY_HALTED state and disables PHY's interrupts; - Suddenly the state machine gets scheduled, it grabs the phydev->lock mutex and tries to process the PHY_HALTED state, so it calls phydev->adjust_link(phydev->attached_dev). In ucc_geth case adjust_link() calls msleep(), which reschedules the code flow back to PM core, which now finishes suspend and so we end up sleeping with phydev->lock mutex held. On resume: - PM core starts resuming devices (notice that nobody rescheduled the state machine yet, so the mutex is still held), the core calls ucc_geth's resume routine; - ucc_geth_resume restarts the PHY with phy_stop()/phy_start() sequence, and the phy_*() calls are trying to grab the phydev->lock mutex. Here comes the deadlock. This patch fixes the issue by stopping the state machine on suspend and starting it again on resume. Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-12-30 15:23:28 +07:00
goto no_resume;
ret = phy_resume(phydev);
phylib: Fix deadlock on resume Sometimes kernel hangs on resume with the following trace: ucc_geth e0102000.ucc: resume INFO: task bash:1764 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. bash D 0fecf43c 0 1764 1763 0x00000000 Call Trace: [cf9a7c10] [c0012868] ret_from_except+0x0/0x14 (unreliable) --- Exception: cf9a7ce0 at __switch_to+0x4c/0x6c LR = 0xcf9a7cc0 [cf9a7cd0] [c0008c14] __switch_to+0x4c/0x6c (unreliable) [cf9a7ce0] [c028bcfc] schedule+0x158/0x260 [cf9a7d10] [c028c720] __mutex_lock_slowpath+0x80/0xd8 [cf9a7d40] [c01cf388] phy_stop+0x20/0x70 [cf9a7d50] [c01d514c] ugeth_resume+0x6c/0x13c [...] Here is why. On suspend: - PM core starts suspending devices, ucc_geth_suspend gets called; - ucc_geth calls phy_stop() on suspend. Note that phy_stop() is mostly asynchronous so it doesn't block ucc_geth's suspend routine, it just sets PHY_HALTED state and disables PHY's interrupts; - Suddenly the state machine gets scheduled, it grabs the phydev->lock mutex and tries to process the PHY_HALTED state, so it calls phydev->adjust_link(phydev->attached_dev). In ucc_geth case adjust_link() calls msleep(), which reschedules the code flow back to PM core, which now finishes suspend and so we end up sleeping with phydev->lock mutex held. On resume: - PM core starts resuming devices (notice that nobody rescheduled the state machine yet, so the mutex is still held), the core calls ucc_geth's resume routine; - ucc_geth_resume restarts the PHY with phy_stop()/phy_start() sequence, and the phy_*() calls are trying to grab the phydev->lock mutex. Here comes the deadlock. This patch fixes the issue by stopping the state machine on suspend and starting it again on resume. Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-12-30 15:23:28 +07:00
if (ret < 0)
return ret;
no_resume:
if (phydev->attached_dev && phydev->adjust_link)
phy_start_machine(phydev);
phylib: Fix deadlock on resume Sometimes kernel hangs on resume with the following trace: ucc_geth e0102000.ucc: resume INFO: task bash:1764 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. bash D 0fecf43c 0 1764 1763 0x00000000 Call Trace: [cf9a7c10] [c0012868] ret_from_except+0x0/0x14 (unreliable) --- Exception: cf9a7ce0 at __switch_to+0x4c/0x6c LR = 0xcf9a7cc0 [cf9a7cd0] [c0008c14] __switch_to+0x4c/0x6c (unreliable) [cf9a7ce0] [c028bcfc] schedule+0x158/0x260 [cf9a7d10] [c028c720] __mutex_lock_slowpath+0x80/0xd8 [cf9a7d40] [c01cf388] phy_stop+0x20/0x70 [cf9a7d50] [c01d514c] ugeth_resume+0x6c/0x13c [...] Here is why. On suspend: - PM core starts suspending devices, ucc_geth_suspend gets called; - ucc_geth calls phy_stop() on suspend. Note that phy_stop() is mostly asynchronous so it doesn't block ucc_geth's suspend routine, it just sets PHY_HALTED state and disables PHY's interrupts; - Suddenly the state machine gets scheduled, it grabs the phydev->lock mutex and tries to process the PHY_HALTED state, so it calls phydev->adjust_link(phydev->attached_dev). In ucc_geth case adjust_link() calls msleep(), which reschedules the code flow back to PM core, which now finishes suspend and so we end up sleeping with phydev->lock mutex held. On resume: - PM core starts resuming devices (notice that nobody rescheduled the state machine yet, so the mutex is still held), the core calls ucc_geth's resume routine; - ucc_geth_resume restarts the PHY with phy_stop()/phy_start() sequence, and the phy_*() calls are trying to grab the phydev->lock mutex. Here comes the deadlock. This patch fixes the issue by stopping the state machine on suspend and starting it again on resume. Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-12-30 15:23:28 +07:00
return 0;
}
static int mdio_bus_restore(struct device *dev)
{
struct phy_device *phydev = to_phy_device(dev);
struct net_device *netdev = phydev->attached_dev;
int ret;
if (!netdev)
return 0;
ret = phy_init_hw(phydev);
if (ret < 0)
return ret;
/* The PHY needs to renegotiate. */
phydev->link = 0;
phydev->state = PHY_UP;
phy_start_machine(phydev);
return 0;
}
static const struct dev_pm_ops mdio_bus_pm_ops = {
.suspend = mdio_bus_suspend,
.resume = mdio_bus_resume,
.freeze = mdio_bus_suspend,
.thaw = mdio_bus_resume,
.restore = mdio_bus_restore,
};
#define MDIO_BUS_PM_OPS (&mdio_bus_pm_ops)
#else
#define MDIO_BUS_PM_OPS NULL
#endif /* CONFIG_PM */
static ssize_t
phy_id_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct phy_device *phydev = to_phy_device(dev);
return sprintf(buf, "0x%.8lx\n", (unsigned long)phydev->phy_id);
}
static DEVICE_ATTR_RO(phy_id);
static ssize_t
phy_interface_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct phy_device *phydev = to_phy_device(dev);
const char *mode = NULL;
if (phy_is_internal(phydev))
mode = "internal";
else
mode = phy_modes(phydev->interface);
return sprintf(buf, "%s\n", mode);
}
static DEVICE_ATTR_RO(phy_interface);
static ssize_t
phy_has_fixups_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct phy_device *phydev = to_phy_device(dev);
return sprintf(buf, "%d\n", phydev->has_fixups);
}
static DEVICE_ATTR_RO(phy_has_fixups);
static struct attribute *mdio_dev_attrs[] = {
&dev_attr_phy_id.attr,
&dev_attr_phy_interface.attr,
&dev_attr_phy_has_fixups.attr,
NULL,
};
ATTRIBUTE_GROUPS(mdio_dev);
struct bus_type mdio_bus_type = {
.name = "mdio_bus",
.match = mdio_bus_match,
.pm = MDIO_BUS_PM_OPS,
.dev_groups = mdio_dev_groups,
};
EXPORT_SYMBOL(mdio_bus_type);
int __init mdio_bus_init(void)
{
int ret;
ret = class_register(&mdio_bus_class);
if (!ret) {
ret = bus_register(&mdio_bus_type);
if (ret)
class_unregister(&mdio_bus_class);
}
return ret;
}
void mdio_bus_exit(void)
{
class_unregister(&mdio_bus_class);
bus_unregister(&mdio_bus_type);
}