From e9e7e85d75f3731079ffd77c1a66f037aef04fe7 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 15 Jul 2019 12:00:20 +0200 Subject: [PATCH 001/169] xfrm interface: avoid corruption on changelink The new parameters must not be stored in the netdev_priv() before validation, it may corrupt the interface. Note also that if data is NULL, only a memset() is done. $ ip link add xfrm1 type xfrm dev lo if_id 1 $ ip link add xfrm2 type xfrm dev lo if_id 2 $ ip link set xfrm1 type xfrm dev lo if_id 2 RTNETLINK answers: File exists $ ip -d link list dev xfrm1 5: xfrm1@lo: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/none 00:00:00:00:00:00 brd 00:00:00:00:00:00 promiscuity 0 minmtu 68 maxmtu 1500 xfrm if_id 0x2 addrgenmode eui64 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535 => "if_id 0x2" Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Signed-off-by: Nicolas Dichtel Tested-by: Julien Floret Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 74868f9d81fb..2310dc9e35c2 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -671,12 +671,12 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct xfrm_if *xi = netdev_priv(dev); struct net *net = dev_net(dev); + struct xfrm_if_parms p; + struct xfrm_if *xi; - xfrmi_netlink_parms(data, &xi->p); - - xi = xfrmi_locate(net, &xi->p); + xfrmi_netlink_parms(data, &p); + xi = xfrmi_locate(net, &p); if (!xi) { xi = netdev_priv(dev); } else { @@ -684,7 +684,7 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], return -EEXIST; } - return xfrmi_update(xi, &xi->p); + return xfrmi_update(xi, &p); } static size_t xfrmi_get_size(const struct net_device *dev) From e0aaa332e6a97dae57ad59cdb19e21f83c3d081c Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 15 Jul 2019 12:00:21 +0200 Subject: [PATCH 002/169] xfrm interface: ifname may be wrong in logs The ifname is copied when the interface is created, but is never updated later. In fact, this property is used only in one error message, where the netdevice pointer is available, thus let's use it. Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 - net/xfrm/xfrm_interface.c | 10 +--------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b22db30c3d88..ad761ef84797 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -983,7 +983,6 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); struct xfrm_if_parms { - char name[IFNAMSIZ]; /* name of XFRM device */ int link; /* ifindex of underlying L2 interface */ u32 if_id; /* interface identifyer */ }; diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 2310dc9e35c2..68336ee00d72 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -145,8 +145,6 @@ static int xfrmi_create(struct net_device *dev) if (err < 0) goto out; - strcpy(xi->p.name, dev->name); - dev_hold(dev); xfrmi_link(xfrmn, xi); @@ -294,7 +292,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) if (tdev == dev) { stats->collisions++; net_warn_ratelimited("%s: Local routing loop detected!\n", - xi->p.name); + dev->name); goto tx_err_dst_release; } @@ -638,12 +636,6 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev, int err; xfrmi_netlink_parms(data, &p); - - if (!tb[IFLA_IFNAME]) - return -EINVAL; - - nla_strlcpy(p.name, tb[IFLA_IFNAME], IFNAMSIZ); - xi = xfrmi_locate(net, &p); if (xi) return -EEXIST; From c5d1030f23002430c2a336b2b629b9d6f72b3564 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 15 Jul 2019 12:00:22 +0200 Subject: [PATCH 003/169] xfrm interface: fix list corruption for x-netns dev_net(dev) is the netns of the device and xi->net is the link netns, where the device has been linked. changelink() must operate in the link netns to avoid a corruption of the xfrm lists. Note that xi->net and dev_net(xi->physdev) are always the same. Before the patch, the xfrmi lists may be corrupted and can later trigger a kernel panic. Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Reported-by: Julien Floret Signed-off-by: Nicolas Dichtel Tested-by: Julien Floret Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 68336ee00d72..53e5e47b2c55 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -503,7 +503,7 @@ static int xfrmi_change(struct xfrm_if *xi, const struct xfrm_if_parms *p) static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p) { - struct net *net = dev_net(xi->dev); + struct net *net = xi->net; struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); int err; @@ -663,9 +663,9 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct net *net = dev_net(dev); + struct xfrm_if *xi = netdev_priv(dev); + struct net *net = xi->net; struct xfrm_if_parms p; - struct xfrm_if *xi; xfrmi_netlink_parms(data, &p); xi = xfrmi_locate(net, &p); @@ -707,7 +707,7 @@ static struct net *xfrmi_get_link_net(const struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); - return dev_net(xi->phydev); + return xi->net; } static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = { From 22d6552f827ef76ade3edf6bbb3f05048a0a7d8b Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 15 Jul 2019 12:00:23 +0200 Subject: [PATCH 004/169] xfrm interface: fix management of phydev With the current implementation, phydev cannot be removed: $ ip link add dummy type dummy $ ip link add xfrm1 type xfrm dev dummy if_id 1 $ ip l d dummy kernel:[77938.465445] unregister_netdevice: waiting for dummy to become free. Usage count = 1 Manage it like in ip tunnels, ie just keep the ifindex. Not that the side effect, is that the phydev is now optional. Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Signed-off-by: Nicolas Dichtel Tested-by: Julien Floret Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 - net/xfrm/xfrm_interface.c | 32 +++++++++++++++++--------------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index ad761ef84797..aa08a7a5f6ac 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -990,7 +990,6 @@ struct xfrm_if_parms { struct xfrm_if { struct xfrm_if __rcu *next; /* next interface in list */ struct net_device *dev; /* virtual device associated with interface */ - struct net_device *phydev; /* physical device */ struct net *net; /* netns for packet i/o */ struct xfrm_if_parms p; /* interface parms */ diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 53e5e47b2c55..2ab4859df55a 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -175,7 +175,6 @@ static void xfrmi_dev_uninit(struct net_device *dev) struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id); xfrmi_unlink(xfrmn, xi); - dev_put(xi->phydev); dev_put(dev); } @@ -362,7 +361,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_err; } - fl.flowi_oif = xi->phydev->ifindex; + fl.flowi_oif = xi->p.link; ret = xfrmi_xmit2(skb, dev, &fl); if (ret < 0) @@ -548,7 +547,7 @@ static int xfrmi_get_iflink(const struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); - return xi->phydev->ifindex; + return xi->p.link; } @@ -574,12 +573,14 @@ static void xfrmi_dev_setup(struct net_device *dev) dev->needs_free_netdev = true; dev->priv_destructor = xfrmi_dev_free; netif_keep_dst(dev); + + eth_broadcast_addr(dev->broadcast); } static int xfrmi_dev_init(struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); - struct net_device *phydev = xi->phydev; + struct net_device *phydev = __dev_get_by_index(xi->net, xi->p.link); int err; dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); @@ -594,13 +595,19 @@ static int xfrmi_dev_init(struct net_device *dev) dev->features |= NETIF_F_LLTX; - dev->needed_headroom = phydev->needed_headroom; - dev->needed_tailroom = phydev->needed_tailroom; + if (phydev) { + dev->needed_headroom = phydev->needed_headroom; + dev->needed_tailroom = phydev->needed_tailroom; - if (is_zero_ether_addr(dev->dev_addr)) - eth_hw_addr_inherit(dev, phydev); - if (is_zero_ether_addr(dev->broadcast)) - memcpy(dev->broadcast, phydev->broadcast, dev->addr_len); + if (is_zero_ether_addr(dev->dev_addr)) + eth_hw_addr_inherit(dev, phydev); + if (is_zero_ether_addr(dev->broadcast)) + memcpy(dev->broadcast, phydev->broadcast, + dev->addr_len); + } else { + eth_hw_addr_random(dev); + eth_broadcast_addr(dev->broadcast); + } return 0; } @@ -644,13 +651,8 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev, xi->p = p; xi->net = net; xi->dev = dev; - xi->phydev = dev_get_by_index(net, p.link); - if (!xi->phydev) - return -ENODEV; err = xfrmi_create(dev); - if (err < 0) - dev_put(xi->phydev); return err; } From 7352e72a513fd2757b2fda695a349d86faa4c94e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 8 Aug 2019 13:33:35 +0300 Subject: [PATCH 005/169] regulator: slg51000: Fix a couple NULL vs IS_ERR() checks The devm_gpiod_get_from_of_node() function never returns NULL, it returns error pointers on error. Fixes: a867bde3dd03 ("regulator: slg51000: add slg51000 regulator driver") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20190808103335.GD30506@mwanda Signed-off-by: Mark Brown --- drivers/regulator/slg51000-regulator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/slg51000-regulator.c b/drivers/regulator/slg51000-regulator.c index 04b732991d69..4d859fef55e6 100644 --- a/drivers/regulator/slg51000-regulator.c +++ b/drivers/regulator/slg51000-regulator.c @@ -205,7 +205,7 @@ static int slg51000_of_parse_cb(struct device_node *np, ena_gpiod = devm_gpiod_get_from_of_node(chip->dev, np, "enable-gpios", 0, gflags, "gpio-en-ldo"); - if (ena_gpiod) { + if (!IS_ERR(ena_gpiod)) { config->ena_gpiod = ena_gpiod; devm_gpiod_unhinge(chip->dev, config->ena_gpiod); } @@ -459,7 +459,7 @@ static int slg51000_i2c_probe(struct i2c_client *client, GPIOD_OUT_HIGH | GPIOD_FLAGS_BIT_NONEXCLUSIVE, "slg51000-cs"); - if (cs_gpiod) { + if (!IS_ERR(cs_gpiod)) { dev_info(dev, "Found chip selector property\n"); chip->cs_gpiod = cs_gpiod; } From 45f5d5a9e34d3fe4140a9a3b5f7ebe86c252440a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 31 Jul 2019 09:48:01 +0200 Subject: [PATCH 006/169] arm64: dts: renesas: r8a77995: draak: Fix backlight regulator name Currently there are two nodes named "regulator1" in the Draak DTS: a 3.3V regulator for the eMMC and the LVDS decoder, and a 12V regulator for the backlight. This causes the former to be overwritten by the latter. Fix this by renaming all regulators with numerical suffixes to use named suffixes, which are less likely to conflict. Fixes: 4fbd4158fe8967e9 ("arm64: dts: renesas: r8a77995: draak: Add backlight") Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- arch/arm64/boot/dts/renesas/r8a77995-draak.dts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r8a77995-draak.dts b/arch/arm64/boot/dts/renesas/r8a77995-draak.dts index 0711170b26b1..3aa2564dfdc2 100644 --- a/arch/arm64/boot/dts/renesas/r8a77995-draak.dts +++ b/arch/arm64/boot/dts/renesas/r8a77995-draak.dts @@ -97,7 +97,7 @@ memory@48000000 { reg = <0x0 0x48000000 0x0 0x18000000>; }; - reg_1p8v: regulator0 { + reg_1p8v: regulator-1p8v { compatible = "regulator-fixed"; regulator-name = "fixed-1.8V"; regulator-min-microvolt = <1800000>; @@ -106,7 +106,7 @@ reg_1p8v: regulator0 { regulator-always-on; }; - reg_3p3v: regulator1 { + reg_3p3v: regulator-3p3v { compatible = "regulator-fixed"; regulator-name = "fixed-3.3V"; regulator-min-microvolt = <3300000>; @@ -115,7 +115,7 @@ reg_3p3v: regulator1 { regulator-always-on; }; - reg_12p0v: regulator1 { + reg_12p0v: regulator-12p0v { compatible = "regulator-fixed"; regulator-name = "D12.0V"; regulator-min-microvolt = <12000000>; From b9ee5e04fd77898208c51b1395fa0b5e8536f9b6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 8 Aug 2019 12:48:26 +0000 Subject: [PATCH 007/169] powerpc/64e: Drop stale call to smp_processor_id() which hangs SMP startup Commit ebb9d30a6a74 ("powerpc/mm: any thread in one core can be the first to setup TLB1") removed the need to know the cpu_id in early_init_this_mmu(), but the call to smp_processor_id() which was marked __maybe_used remained. Since commit ed1cd6deb013 ("powerpc: Activate CONFIG_THREAD_INFO_IN_TASK") thread_info cannot be reached before MMU is properly set up. Drop this stale call to smp_processor_id() which makes SMP hang when CONFIG_PREEMPT is set. Fixes: ebb9d30a6a74 ("powerpc/mm: any thread in one core can be the first to setup TLB1") Fixes: ed1cd6deb013 ("powerpc: Activate CONFIG_THREAD_INFO_IN_TASK") Cc: stable@vger.kernel.org # v5.1+ Reported-by: Chris Packham Signed-off-by: Christophe Leroy Tested-by: Chris Packham Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bef479514f4c08329fa649f67735df8918bc0976.1565268248.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/nohash/tlb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index d4acf6fa0596..bf60983a58c7 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -630,7 +630,6 @@ static void early_init_this_mmu(void) #ifdef CONFIG_PPC_FSL_BOOK3E if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { unsigned int num_cams; - int __maybe_unused cpu = smp_processor_id(); bool map = true; /* use a quarter of the TLBCAM for bolted linear map */ From 5d2fc542e8c92130b55ddeb81876ec398adf4d13 Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Tue, 13 Aug 2019 00:09:54 +0530 Subject: [PATCH 008/169] regulator: act8945a-regulator: fix ldo register addresses in set_mode hook According to ACT8945A datasheet[1], operating modes for ldos are controlled by BIT(5) of their respective _CTRL registers. [1] https://active-semi.com/wp-content/uploads/ACT8945A_Datasheet.pdf Fixes: 7482d6ecc68e ("regulator: act8945a-regulator: Implement PM functionalities") Signed-off-by: Raag Jadav Link: https://lore.kernel.org/r/1565635194-5816-1-git-send-email-raagjadav@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/act8945a-regulator.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/regulator/act8945a-regulator.c b/drivers/regulator/act8945a-regulator.c index 584284938ac9..d2f804dbc785 100644 --- a/drivers/regulator/act8945a-regulator.c +++ b/drivers/regulator/act8945a-regulator.c @@ -169,16 +169,16 @@ static int act8945a_set_mode(struct regulator_dev *rdev, unsigned int mode) reg = ACT8945A_DCDC3_CTRL; break; case ACT8945A_ID_LDO1: - reg = ACT8945A_LDO1_SUS; + reg = ACT8945A_LDO1_CTRL; break; case ACT8945A_ID_LDO2: - reg = ACT8945A_LDO2_SUS; + reg = ACT8945A_LDO2_CTRL; break; case ACT8945A_ID_LDO3: - reg = ACT8945A_LDO3_SUS; + reg = ACT8945A_LDO3_CTRL; break; case ACT8945A_ID_LDO4: - reg = ACT8945A_LDO4_SUS; + reg = ACT8945A_LDO4_CTRL; break; default: return -EINVAL; From 3829100a63724f6dbf264b2a7f06e7f638ed952d Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Wed, 14 Aug 2019 23:43:19 +0200 Subject: [PATCH 009/169] regulator: twl: voltage lists for vdd1/2 on twl4030 _opp_supported_by_regulators() wrongly ignored errors from regulator_is_supported_voltage(), so it considered errors as success. Since commit 498209445124 ("regulator: core: simplify return value on suported_voltage") regulator_is_supported_voltage() returns a real boolean, so errors make _opp_supported_by_regulators() return false. That reveals a problem with the declaration of the VDD1/2 regulators on twl4030. The VDD1/VDD2 regulators on twl4030 are neither defined with voltage lists nor with the continuous flag set, so regulator_is_supported_voltage() returns false and an error before above mentioned commit (which was considered success) The result is that after the above mentioned commit cpufreq does not work properly e.g. dm3730. [ 2.490997] core: _opp_supported_by_regulators: OPP minuV: 1012500 maxuV: 1012500, not supported by regulator [ 2.501617] cpu cpu0: _opp_add: OPP not supported by regulators (300000000) [ 2.509246] core: _opp_supported_by_regulators: OPP minuV: 1200000 maxuV: 1200000, not supported by regulator [ 2.519775] cpu cpu0: _opp_add: OPP not supported by regulators (600000000) [ 2.527313] core: _opp_supported_by_regulators: OPP minuV: 1325000 maxuV: 1325000, not supported by regulator [ 2.537750] cpu cpu0: _opp_add: OPP not supported by regulators (800000000) The patch fixes declaration of VDD1/2 regulators by adding proper voltage lists. Fixes: 498209445124 ("regulator: core: simplify return value on suported_voltage") Cc: stable@vger.kernel.org Signed-off-by: Andreas Kemnade Tested-by: Adam Ford #logicpd-torpedo-37xx-devkit Link: https://lore.kernel.org/r/20190814214319.24087-1-andreas@kemnade.info Signed-off-by: Mark Brown --- drivers/regulator/twl-regulator.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c index 6fa15b2d6fb3..866b4dd01da9 100644 --- a/drivers/regulator/twl-regulator.c +++ b/drivers/regulator/twl-regulator.c @@ -359,6 +359,17 @@ static const u16 VINTANA2_VSEL_table[] = { 2500, 2750, }; +/* 600mV to 1450mV in 12.5 mV steps */ +static const struct regulator_linear_range VDD1_ranges[] = { + REGULATOR_LINEAR_RANGE(600000, 0, 68, 12500) +}; + +/* 600mV to 1450mV in 12.5 mV steps, everything above = 1500mV */ +static const struct regulator_linear_range VDD2_ranges[] = { + REGULATOR_LINEAR_RANGE(600000, 0, 68, 12500), + REGULATOR_LINEAR_RANGE(1500000, 69, 69, 12500) +}; + static int twl4030ldo_list_voltage(struct regulator_dev *rdev, unsigned index) { struct twlreg_info *info = rdev_get_drvdata(rdev); @@ -427,6 +438,8 @@ static int twl4030smps_get_voltage(struct regulator_dev *rdev) } static const struct regulator_ops twl4030smps_ops = { + .list_voltage = regulator_list_voltage_linear_range, + .set_voltage = twl4030smps_set_voltage, .get_voltage = twl4030smps_get_voltage, }; @@ -466,7 +479,8 @@ static const struct twlreg_info TWL4030_INFO_##label = { \ }, \ } -#define TWL4030_ADJUSTABLE_SMPS(label, offset, num, turnon_delay, remap_conf) \ +#define TWL4030_ADJUSTABLE_SMPS(label, offset, num, turnon_delay, remap_conf, \ + n_volt) \ static const struct twlreg_info TWL4030_INFO_##label = { \ .base = offset, \ .id = num, \ @@ -479,6 +493,9 @@ static const struct twlreg_info TWL4030_INFO_##label = { \ .owner = THIS_MODULE, \ .enable_time = turnon_delay, \ .of_map_mode = twl4030reg_map_mode, \ + .n_voltages = n_volt, \ + .n_linear_ranges = ARRAY_SIZE(label ## _ranges), \ + .linear_ranges = label ## _ranges, \ }, \ } @@ -518,8 +535,8 @@ TWL4030_ADJUSTABLE_LDO(VSIM, 0x37, 9, 100, 0x00); TWL4030_ADJUSTABLE_LDO(VDAC, 0x3b, 10, 100, 0x08); TWL4030_ADJUSTABLE_LDO(VINTANA2, 0x43, 12, 100, 0x08); TWL4030_ADJUSTABLE_LDO(VIO, 0x4b, 14, 1000, 0x08); -TWL4030_ADJUSTABLE_SMPS(VDD1, 0x55, 15, 1000, 0x08); -TWL4030_ADJUSTABLE_SMPS(VDD2, 0x63, 16, 1000, 0x08); +TWL4030_ADJUSTABLE_SMPS(VDD1, 0x55, 15, 1000, 0x08, 68); +TWL4030_ADJUSTABLE_SMPS(VDD2, 0x63, 16, 1000, 0x08, 69); /* VUSBCP is managed *only* by the USB subchip */ TWL4030_FIXED_LDO(VINTANA1, 0x3f, 1500, 11, 100, 0x08); TWL4030_FIXED_LDO(VINTDIG, 0x47, 1500, 13, 100, 0x08); From 769a807d0b41df4201dbeb01c22eaeb3e5905532 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 12 Aug 2019 10:32:13 +0200 Subject: [PATCH 010/169] xfrm: policy: avoid warning splat when merging nodes syzbot reported a splat: xfrm_policy_inexact_list_reinsert+0x625/0x6e0 net/xfrm/xfrm_policy.c:877 CPU: 1 PID: 6756 Comm: syz-executor.1 Not tainted 5.3.0-rc2+ #57 Call Trace: xfrm_policy_inexact_node_reinsert net/xfrm/xfrm_policy.c:922 [inline] xfrm_policy_inexact_node_merge net/xfrm/xfrm_policy.c:958 [inline] xfrm_policy_inexact_insert_node+0x537/0xb50 net/xfrm/xfrm_policy.c:1023 xfrm_policy_inexact_alloc_chain+0x62b/0xbd0 net/xfrm/xfrm_policy.c:1139 xfrm_policy_inexact_insert+0xe8/0x1540 net/xfrm/xfrm_policy.c:1182 xfrm_policy_insert+0xdf/0xce0 net/xfrm/xfrm_policy.c:1574 xfrm_add_policy+0x4cf/0x9b0 net/xfrm/xfrm_user.c:1670 xfrm_user_rcv_msg+0x46b/0x720 net/xfrm/xfrm_user.c:2676 netlink_rcv_skb+0x1f0/0x460 net/netlink/af_netlink.c:2477 xfrm_netlink_rcv+0x74/0x90 net/xfrm/xfrm_user.c:2684 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0x809/0x9a0 net/netlink/af_netlink.c:1328 netlink_sendmsg+0xa70/0xd30 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg net/socket.c:657 [inline] There is no reproducer, however, the warning can be reproduced by adding rules with ever smaller prefixes. The sanity check ("does the policy match the node") uses the prefix value of the node before its updated to the smaller value. To fix this, update the prefix earlier. The bug has no impact on tree correctness, this is only to prevent a false warning. Reported-by: syzbot+8cc27ace5f6972910b31@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 6 ++++-- tools/testing/selftests/net/xfrm_policy.sh | 7 +++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8ca637a72697..0fa7c5ce3b2c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -912,6 +912,7 @@ static void xfrm_policy_inexact_node_reinsert(struct net *net, } else if (delta > 0) { p = &parent->rb_right; } else { + bool same_prefixlen = node->prefixlen == n->prefixlen; struct xfrm_policy *tmp; hlist_for_each_entry(tmp, &n->hhead, bydst) { @@ -919,9 +920,11 @@ static void xfrm_policy_inexact_node_reinsert(struct net *net, hlist_del_rcu(&tmp->bydst); } + node->prefixlen = prefixlen; + xfrm_policy_inexact_list_reinsert(net, node, family); - if (node->prefixlen == n->prefixlen) { + if (same_prefixlen) { kfree_rcu(n, rcu); return; } @@ -929,7 +932,6 @@ static void xfrm_policy_inexact_node_reinsert(struct net *net, rb_erase(*p, new); kfree_rcu(n, rcu); n = node; - n->prefixlen = prefixlen; goto restart; } } diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index 5445943bf07f..7a1bf94c5bd3 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -106,6 +106,13 @@ do_overlap() # # 10.0.0.0/24 and 10.0.1.0/24 nodes have been merged as 10.0.0.0/23. ip -net $ns xfrm policy add src 10.1.0.0/24 dst 10.0.0.0/23 dir fwd priority 200 action block + + # similar to above: add policies (with partially random address), with shrinking prefixes. + for p in 29 28 27;do + for k in $(seq 1 32); do + ip -net $ns xfrm policy add src 10.253.1.$((RANDOM%255))/$p dst 10.254.1.$((RANDOM%255))/$p dir fwd priority $((200+k)) action block 2>/dev/null + done + done } do_esp_policy_get_check() { From 2c231c0c1dec42192aca0f87f2dc68b8f0cbc7d2 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Fri, 16 Aug 2019 01:48:55 -0500 Subject: [PATCH 011/169] dmaengine: ti: dma-crossbar: Fix a memory leak bug In ti_dra7_xbar_probe(), 'rsv_events' is allocated through kcalloc(). Then of_property_read_u32_array() is invoked to search for the property. However, if this process fails, 'rsv_events' is not deallocated, leading to a memory leak bug. To fix this issue, free 'rsv_events' before returning the error. Signed-off-by: Wenwen Wang Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/1565938136-7249-1-git-send-email-wenwen@cs.uga.edu Signed-off-by: Vinod Koul --- drivers/dma/ti/dma-crossbar.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/ti/dma-crossbar.c b/drivers/dma/ti/dma-crossbar.c index ad2f0a4cd6a4..f255056696ee 100644 --- a/drivers/dma/ti/dma-crossbar.c +++ b/drivers/dma/ti/dma-crossbar.c @@ -391,8 +391,10 @@ static int ti_dra7_xbar_probe(struct platform_device *pdev) ret = of_property_read_u32_array(node, pname, (u32 *)rsv_events, nelm * 2); - if (ret) + if (ret) { + kfree(rsv_events); return ret; + } for (i = 0; i < nelm; i++) { ti_dra7_xbar_reserve(rsv_events[i][0], rsv_events[i][1], From 962411b05a6d3342aa649e39cda1704c1fc042c6 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Fri, 16 Aug 2019 01:56:08 -0500 Subject: [PATCH 012/169] dmaengine: ti: omap-dma: Add cleanup in omap_dma_probe() If devm_request_irq() fails to disable all interrupts, no cleanup is performed before retuning the error. To fix this issue, invoke omap_dma_free() to do the cleanup. Signed-off-by: Wenwen Wang Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/1565938570-7528-1-git-send-email-wenwen@cs.uga.edu Signed-off-by: Vinod Koul --- drivers/dma/ti/omap-dma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/ti/omap-dma.c b/drivers/dma/ti/omap-dma.c index ba27802efcd0..d07c0d5de7a2 100644 --- a/drivers/dma/ti/omap-dma.c +++ b/drivers/dma/ti/omap-dma.c @@ -1540,8 +1540,10 @@ static int omap_dma_probe(struct platform_device *pdev) rc = devm_request_irq(&pdev->dev, irq, omap_dma_irq, IRQF_SHARED, "omap-dma-engine", od); - if (rc) + if (rc) { + omap_dma_free(od); return rc; + } } if (omap_dma_glbl_read(od, CAPS_0) & CAPS_0_SUPPORT_LL123) From bc624a06f0c5190bc37fec7d22cd82b43a579698 Mon Sep 17 00:00:00 2001 From: David Jander Date: Tue, 27 Aug 2019 06:46:28 +0000 Subject: [PATCH 013/169] gpio: pca953x: correct type of reg_direction The type of reg_direction needs to match the type of the regmap, which is u8. Fixes: 0f25fda840a9 ("gpio: pca953x: Zap ad-hoc reg_direction cache") Cc: Cc: Signed-off-by: David Jander Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 378b206d2dc9..30072a570bc2 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -604,7 +604,7 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d) u8 new_irqs; int level, i; u8 invert_irq_mask[MAX_BANK]; - int reg_direction[MAX_BANK]; + u8 reg_direction[MAX_BANK]; regmap_bulk_read(chip->regmap, chip->regs->direction, reg_direction, NBANK(chip)); @@ -679,7 +679,7 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending) bool pending_seen = false; bool trigger_seen = false; u8 trigger[MAX_BANK]; - int reg_direction[MAX_BANK]; + u8 reg_direction[MAX_BANK]; int ret, i; if (chip->driver_data & PCA_PCAL) { @@ -768,7 +768,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, { struct i2c_client *client = chip->client; struct irq_chip *irq_chip = &chip->irq_chip; - int reg_direction[MAX_BANK]; + u8 reg_direction[MAX_BANK]; int ret, i; if (!client->irq) From 438b6c20e6161a1a7542490baa093c86732f77d6 Mon Sep 17 00:00:00 2001 From: David Jander Date: Tue, 27 Aug 2019 06:46:29 +0000 Subject: [PATCH 014/169] gpio: pca953x: use pca953x_read_regs instead of regmap_bulk_read The register number needs to be translated for chips with more than 8 ports. This patch fixes a bug causing all chips with more than 8 GPIO pins to not work correctly. Fixes: 0f25fda840a9 ("gpio: pca953x: Zap ad-hoc reg_direction cache") Cc: Cc: Signed-off-by: David Jander Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 30072a570bc2..48fea4c68e8d 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -606,8 +606,7 @@ static void pca953x_irq_bus_sync_unlock(struct irq_data *d) u8 invert_irq_mask[MAX_BANK]; u8 reg_direction[MAX_BANK]; - regmap_bulk_read(chip->regmap, chip->regs->direction, reg_direction, - NBANK(chip)); + pca953x_read_regs(chip, chip->regs->direction, reg_direction); if (chip->driver_data & PCA_PCAL) { /* Enable latch on interrupt-enabled inputs */ @@ -710,8 +709,7 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, u8 *pending) return false; /* Remove output pins from the equation */ - regmap_bulk_read(chip->regmap, chip->regs->direction, reg_direction, - NBANK(chip)); + pca953x_read_regs(chip, chip->regs->direction, reg_direction); for (i = 0; i < NBANK(chip); i++) cur_stat[i] &= reg_direction[i]; @@ -789,8 +787,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, * interrupt. We have to rely on the previous read for * this purpose. */ - regmap_bulk_read(chip->regmap, chip->regs->direction, reg_direction, - NBANK(chip)); + pca953x_read_regs(chip, chip->regs->direction, reg_direction); for (i = 0; i < NBANK(chip); i++) chip->irq_stat[i] &= reg_direction[i]; mutex_init(&chip->irq_lock); From 274b924088e93593c76fb122d24bc0ef18d0ddf4 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Wed, 28 Aug 2019 11:49:46 -0400 Subject: [PATCH 015/169] libnvdimm/pfn: Fix namespace creation on misaligned addresses Yi reported[1] that after commit a3619190d62e ("libnvdimm/pfn: stop padding pmem namespaces to section alignment"), it was no longer possible to create a device dax namespace with a 1G alignment. The reason was that the pmem region was not itself 1G-aligned. The code happily skips past the first 512M, but fails to account for a now misaligned end offset (since space was allocated starting at that misaligned address, and extending for size GBs). Reintroduce end_trunc, so that the code correctly handles the misaligned end address. This results in the same behavior as before the introduction of the offending commit. [1] https://lists.01.org/pipermail/linux-nvdimm/2019-July/022813.html Fixes: a3619190d62e ("libnvdimm/pfn: stop padding pmem namespaces ...") Reported-and-tested-by: Yi Zhang Signed-off-by: Jeff Moyer Link: https://lore.kernel.org/r/x49ftll8f39.fsf@segfault.boston.devel.redhat.com Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 3e7b11cf1aae..cb98b8fe786e 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -655,6 +655,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) resource_size_t start, size; struct nd_region *nd_region; unsigned long npfns, align; + u32 end_trunc; struct nd_pfn_sb *pfn_sb; phys_addr_t offset; const char *sig; @@ -696,6 +697,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) size = resource_size(&nsio->res); npfns = PHYS_PFN(size - SZ_8K); align = max(nd_pfn->align, (1UL << SUBSECTION_SHIFT)); + end_trunc = start + size - ALIGN_DOWN(start + size, align); if (nd_pfn->mode == PFN_MODE_PMEM) { /* * The altmap should be padded out to the block size used @@ -714,7 +716,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) return -ENXIO; } - npfns = PHYS_PFN(size - offset); + npfns = PHYS_PFN(size - offset - end_trunc); pfn_sb->mode = cpu_to_le32(nd_pfn->mode); pfn_sb->dataoff = cpu_to_le64(offset); pfn_sb->npfns = cpu_to_le64(npfns); @@ -723,6 +725,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); pfn_sb->version_major = cpu_to_le16(1); pfn_sb->version_minor = cpu_to_le16(3); + pfn_sb->end_trunc = cpu_to_le32(end_trunc); pfn_sb->align = cpu_to_le32(nd_pfn->align); checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); pfn_sb->checksum = cpu_to_le64(checksum); From 807f6c8472da716e29744e1fa35480b08af3e4bc Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sat, 24 Aug 2019 00:23:53 +0300 Subject: [PATCH 016/169] drm/ingenic: Hardcode panel type to DPI The ingenic driver supports DPI panels only at the moment, so hardcode their type to DPI instead of Unknown. Signed-off-by: Laurent Pinchart Signed-off-by: Paul Cercueil Link: https://patchwork.freedesktop.org/patch/msgid/20190823212353.29369-1-laurent.pinchart@ideasonboard.com # *** extracted tags *** Reviewed-by: Sam Ravnborg --- drivers/gpu/drm/ingenic/ingenic-drm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/ingenic/ingenic-drm.c b/drivers/gpu/drm/ingenic/ingenic-drm.c index e9f9e9fb9b17..6381652a8829 100644 --- a/drivers/gpu/drm/ingenic/ingenic-drm.c +++ b/drivers/gpu/drm/ingenic/ingenic-drm.c @@ -656,10 +656,9 @@ static int ingenic_drm_probe(struct platform_device *pdev) return ret; } - if (panel) { + if (panel) bridge = devm_drm_panel_bridge_add(dev, panel, - DRM_MODE_CONNECTOR_Unknown); - } + DRM_MODE_CONNECTOR_DPI); priv->dma_hwdesc = dma_alloc_coherent(dev, sizeof(*priv->dma_hwdesc), &priv->dma_hwdesc_phys, From 333f31436d3db19f4286f8862a00ea1d8d8420a1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 29 Aug 2019 09:52:02 +0200 Subject: [PATCH 017/169] ALSA: hda - Fix potential endless loop at applying quirks Since the chained quirks via chained_before flag is applied before the depth check, it may lead to the endless recursive calls, when the chain were set up incorrectly. Fix it by moving the depth check at the beginning of the loop. Fixes: 1f57825077dc ("ALSA: hda - Add chained_before flag to the fixup entry") Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_auto_parser.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c index 92390d457567..18e6546b4467 100644 --- a/sound/pci/hda/hda_auto_parser.c +++ b/sound/pci/hda/hda_auto_parser.c @@ -824,6 +824,8 @@ static void apply_fixup(struct hda_codec *codec, int id, int action, int depth) while (id >= 0) { const struct hda_fixup *fix = codec->fixup_list + id; + if (++depth > 10) + break; if (fix->chained_before) apply_fixup(codec, fix->chain_id, action, depth + 1); @@ -863,8 +865,6 @@ static void apply_fixup(struct hda_codec *codec, int id, int action, int depth) } if (!fix->chained || fix->chained_before) break; - if (++depth > 10) - break; id = fix->chain_id; } } From 0622800d2ebccead42b3a85e255f7d473a36ec99 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 27 Aug 2019 14:28:23 -0700 Subject: [PATCH 018/169] scsi: lpfc: Raise config max for lpfc_fcp_mq_threshold variable Raise the config max for lpfc_fcp_mq_threshold variable to 256. Signed-off-by: Dick Kennedy Signed-off-by: James Smart CC: Hannes Reinecke Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_attr.c | 2 +- drivers/scsi/lpfc/lpfc_sli4.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 8d8c495b5b60..d65558619ab0 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -5715,7 +5715,7 @@ LPFC_ATTR_RW(nvme_embed_cmd, 1, 0, 2, * 0 = Set nr_hw_queues by the number of CPUs or HW queues. * 1,128 = Manually specify the maximum nr_hw_queue value to be set, * - * Value range is [0,128]. Default value is 8. + * Value range is [0,256]. Default value is 8. */ LPFC_ATTR_R(fcp_mq_threshold, LPFC_FCP_MQ_THRESHOLD_DEF, LPFC_FCP_MQ_THRESHOLD_MIN, LPFC_FCP_MQ_THRESHOLD_MAX, diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 329f7aa7e169..a81ef0293696 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -46,7 +46,7 @@ /* FCP MQ queue count limiting */ #define LPFC_FCP_MQ_THRESHOLD_MIN 0 -#define LPFC_FCP_MQ_THRESHOLD_MAX 128 +#define LPFC_FCP_MQ_THRESHOLD_MAX 256 #define LPFC_FCP_MQ_THRESHOLD_DEF 8 /* Common buffer size to accomidate SCSI and NVME IO buffers */ From 1ffdb51f28e8ec6be0a2b812c1765b5cf5c44a8f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 19 Aug 2019 12:04:08 -0500 Subject: [PATCH 019/169] Revert "Bluetooth: btusb: driver to enable the usb-wakeup feature" This reverts commit a0085f2510e8976614ad8f766b209448b385492f. This commit has caused regressions in notebooks that support suspend to idle such as the XPS 9360, XPS 9370 and XPS 9380. These notebooks will wakeup from suspend to idle from an unsolicited advertising packet from an unpaired BLE device. In a bug report it was sugggested that this is caused by a generic lack of LE privacy support. Revert this commit until that behavior can be avoided by the kernel. Fixes: a0085f2510e8 ("Bluetooth: btusb: driver to enable the usb-wakeup feature") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=200039 Link: https://marc.info/?l=linux-bluetooth&m=156441081612627&w=2 Link: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/750073/ CC: Bastien Nocera CC: Christian Kellner CC: Sukumar Ghorai Signed-off-by: Mario Limonciello Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 5cf0734eb31b..5c67d41ca254 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -1170,10 +1170,6 @@ static int btusb_open(struct hci_dev *hdev) } data->intf->needs_remote_wakeup = 1; - /* device specific wakeup source enabled and required for USB - * remote wakeup while host is suspended - */ - device_wakeup_enable(&data->udev->dev); if (test_and_set_bit(BTUSB_INTR_RUNNING, &data->flags)) goto done; @@ -1238,7 +1234,6 @@ static int btusb_close(struct hci_dev *hdev) goto failed; data->intf->needs_remote_wakeup = 0; - device_wakeup_disable(&data->udev->dev); usb_autopm_put_interface(data->intf); failed: From 325d0ab3a1d1ad8b8c4f9f03fd866e55b0e24254 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 27 Aug 2019 13:58:47 +0200 Subject: [PATCH 020/169] drm/modes: Add a switch to differentiate free standing options Some extra command line options can be either specified without anything else on the command line (basically all the force connection options), but some other are only relevant when matched with a resolution (margin and interlace). Let's add a switch to restrict if needed the available option set. Fixes: e08ab74bd4c7 ("drm/modes: Rewrite the command line parser") Signed-off-by: Maxime Ripard Tested-by: Thomas Graichen Reviewed-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190827115850.25731-1-mripard@kernel.org --- drivers/gpu/drm/drm_modes.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index b0369e690f36..3d10425f6b18 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -1454,6 +1454,7 @@ static int drm_mode_parse_cmdline_refresh(const char *str, char **end_ptr, } static int drm_mode_parse_cmdline_extra(const char *str, int length, + bool freestanding, const struct drm_connector *connector, struct drm_cmdline_mode *mode) { @@ -1462,9 +1463,15 @@ static int drm_mode_parse_cmdline_extra(const char *str, int length, for (i = 0; i < length; i++) { switch (str[i]) { case 'i': + if (freestanding) + return -EINVAL; + mode->interlace = true; break; case 'm': + if (freestanding) + return -EINVAL; + mode->margins = true; break; case 'D': @@ -1542,6 +1549,7 @@ static int drm_mode_parse_cmdline_res_mode(const char *str, unsigned int length, if (extras) { int ret = drm_mode_parse_cmdline_extra(end_ptr + i, 1, + false, connector, mode); if (ret) @@ -1811,7 +1819,7 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option, extra_ptr != options_ptr) { int len = strlen(name) - (extra_ptr - name); - ret = drm_mode_parse_cmdline_extra(extra_ptr, len, + ret = drm_mode_parse_cmdline_extra(extra_ptr, len, false, connector, mode); if (ret) return false; From 728a257f652aee5dd03ecde90b11f414a255e08b Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 27 Aug 2019 13:58:48 +0200 Subject: [PATCH 021/169] drm/modes: Fix the command line parser to take force options into account MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The command line parser when it has been rewritten introduced a regression when the only thing on the command line is an option to force the detection of a connector (such as video=HDMI-A-1:d), which are completely valid. It's been further broken by the support for the named modes which take anything that is not a resolution as a named mode. Let's fix this by running the extra command line option parser on the named modes if they only take a single character. Fixes: e08ab74bd4c7 ("drm/modes: Rewrite the command line parser") Reported-by: Jernej Škrabec Reported-by: Thomas Graichen Signed-off-by: Maxime Ripard Tested-by: Thomas Graichen Reviewed-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190827115850.25731-2-mripard@kernel.org --- drivers/gpu/drm/drm_modes.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 3d10425f6b18..27fe410bba5c 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -1733,16 +1733,30 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option, * bunch of things: * - We need to make sure that the first character (which * would be our resolution in X) is a digit. - * - However, if the X resolution is missing, then we end up - * with something like x, with our first character - * being an alpha-numerical character, which would be - * considered a named mode. + * - If not, then it's either a named mode or a force on/off. + * To distinguish between the two, we need to run the + * extra parsing function, and if not, then we consider it + * a named mode. * * If this isn't enough, we should add more heuristics here, * and matching unit-tests. */ - if (!isdigit(name[0]) && name[0] != 'x') + if (!isdigit(name[0]) && name[0] != 'x') { + unsigned int namelen = strlen(name); + + /* + * Only the force on/off options can be in that case, + * and they all take a single character. + */ + if (namelen == 1) { + ret = drm_mode_parse_cmdline_extra(name, namelen, true, + connector, mode); + if (!ret) + return true; + } + named_mode = true; + } /* Try to locate the bpp and refresh specifiers, if any */ bpp_ptr = strchr(name, '-'); From 3764137906a5acece8b5546873a70b1e6263a1a6 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 27 Aug 2019 13:58:49 +0200 Subject: [PATCH 022/169] drm/modes: Introduce a whitelist for the named modes The named modes support has introduced a number of glitches that were in part due to the fact that the parser will take any string as a named mode. Since we shouldn't have a lot of options there (and they should be pretty standard), let's introduce a whitelist of the available named modes so that the kernel can differentiate between a poorly formed command line and a named mode. Signed-off-by: Maxime Ripard Tested-by: Thomas Graichen Reviewed-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190827115850.25731-3-mripard@kernel.org --- drivers/gpu/drm/drm_modes.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 27fe410bba5c..0d23bf729e9f 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -1677,6 +1677,22 @@ static int drm_mode_parse_cmdline_options(char *str, size_t len, return 0; } +static const char *drm_named_modes_whitelist[] = { + "NTSC", + "PAL", +}; + +static bool drm_named_mode_is_in_whitelist(const char *mode, unsigned int size) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(drm_named_modes_whitelist); i++) + if (!strncmp(mode, drm_named_modes_whitelist[i], size)) + return true; + + return false; +} + /** * drm_mode_parse_command_line_for_connector - parse command line modeline for connector * @mode_option: optional per connector mode option @@ -1794,6 +1810,10 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option, if (named_mode) { if (mode_end + 1 > DRM_DISPLAY_MODE_LEN) return false; + + if (!drm_named_mode_is_in_whitelist(name, mode_end)) + return false; + strscpy(mode->name, name, mode_end + 1); } else { ret = drm_mode_parse_cmdline_res_mode(name, mode_end, From 424c38a4e32509ae82dc9d7300432295806cb911 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 27 Aug 2019 13:58:50 +0200 Subject: [PATCH 023/169] drm/selftests: modes: Add more unit tests for the cmdline parser Let's add some unit tests for the recent bugs we just fixed. Signed-off-by: Maxime Ripard Tested-by: Thomas Graichen Reviewed-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190827115850.25731-4-mripard@kernel.org --- .../gpu/drm/selftests/drm_cmdline_selftests.h | 7 + .../drm/selftests/test-drm_cmdline_parser.c | 130 ++++++++++++++++++ 2 files changed, 137 insertions(+) diff --git a/drivers/gpu/drm/selftests/drm_cmdline_selftests.h b/drivers/gpu/drm/selftests/drm_cmdline_selftests.h index b45824ec7c8f..6d61a0eb5d64 100644 --- a/drivers/gpu/drm/selftests/drm_cmdline_selftests.h +++ b/drivers/gpu/drm/selftests/drm_cmdline_selftests.h @@ -9,6 +9,13 @@ #define cmdline_test(test) selftest(test, test) +cmdline_test(drm_cmdline_test_force_d_only) +cmdline_test(drm_cmdline_test_force_D_only_dvi) +cmdline_test(drm_cmdline_test_force_D_only_hdmi) +cmdline_test(drm_cmdline_test_force_D_only_not_digital) +cmdline_test(drm_cmdline_test_force_e_only) +cmdline_test(drm_cmdline_test_margin_only) +cmdline_test(drm_cmdline_test_interlace_only) cmdline_test(drm_cmdline_test_res) cmdline_test(drm_cmdline_test_res_missing_x) cmdline_test(drm_cmdline_test_res_missing_y) diff --git a/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c b/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c index 14c96edb13df..013de9d27c35 100644 --- a/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c +++ b/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c @@ -17,6 +17,136 @@ static const struct drm_connector no_connector = {}; +static int drm_cmdline_test_force_e_only(void *ignored) +{ + struct drm_cmdline_mode mode = { }; + + FAIL_ON(!drm_mode_parse_command_line_for_connector("e", + &no_connector, + &mode)); + FAIL_ON(mode.specified); + FAIL_ON(mode.refresh_specified); + FAIL_ON(mode.bpp_specified); + + FAIL_ON(mode.rb); + FAIL_ON(mode.cvt); + FAIL_ON(mode.interlace); + FAIL_ON(mode.margins); + FAIL_ON(mode.force != DRM_FORCE_ON); + + return 0; +} + +static int drm_cmdline_test_force_D_only_not_digital(void *ignored) +{ + struct drm_cmdline_mode mode = { }; + + FAIL_ON(!drm_mode_parse_command_line_for_connector("D", + &no_connector, + &mode)); + FAIL_ON(mode.specified); + FAIL_ON(mode.refresh_specified); + FAIL_ON(mode.bpp_specified); + + FAIL_ON(mode.rb); + FAIL_ON(mode.cvt); + FAIL_ON(mode.interlace); + FAIL_ON(mode.margins); + FAIL_ON(mode.force != DRM_FORCE_ON); + + return 0; +} + +static const struct drm_connector connector_hdmi = { + .connector_type = DRM_MODE_CONNECTOR_HDMIB, +}; + +static int drm_cmdline_test_force_D_only_hdmi(void *ignored) +{ + struct drm_cmdline_mode mode = { }; + + FAIL_ON(!drm_mode_parse_command_line_for_connector("D", + &connector_hdmi, + &mode)); + FAIL_ON(mode.specified); + FAIL_ON(mode.refresh_specified); + FAIL_ON(mode.bpp_specified); + + FAIL_ON(mode.rb); + FAIL_ON(mode.cvt); + FAIL_ON(mode.interlace); + FAIL_ON(mode.margins); + FAIL_ON(mode.force != DRM_FORCE_ON_DIGITAL); + + return 0; +} + +static const struct drm_connector connector_dvi = { + .connector_type = DRM_MODE_CONNECTOR_DVII, +}; + +static int drm_cmdline_test_force_D_only_dvi(void *ignored) +{ + struct drm_cmdline_mode mode = { }; + + FAIL_ON(!drm_mode_parse_command_line_for_connector("D", + &connector_dvi, + &mode)); + FAIL_ON(mode.specified); + FAIL_ON(mode.refresh_specified); + FAIL_ON(mode.bpp_specified); + + FAIL_ON(mode.rb); + FAIL_ON(mode.cvt); + FAIL_ON(mode.interlace); + FAIL_ON(mode.margins); + FAIL_ON(mode.force != DRM_FORCE_ON_DIGITAL); + + return 0; +} + +static int drm_cmdline_test_force_d_only(void *ignored) +{ + struct drm_cmdline_mode mode = { }; + + FAIL_ON(!drm_mode_parse_command_line_for_connector("d", + &no_connector, + &mode)); + FAIL_ON(mode.specified); + FAIL_ON(mode.refresh_specified); + FAIL_ON(mode.bpp_specified); + + FAIL_ON(mode.rb); + FAIL_ON(mode.cvt); + FAIL_ON(mode.interlace); + FAIL_ON(mode.margins); + FAIL_ON(mode.force != DRM_FORCE_OFF); + + return 0; +} + +static int drm_cmdline_test_margin_only(void *ignored) +{ + struct drm_cmdline_mode mode = { }; + + FAIL_ON(drm_mode_parse_command_line_for_connector("m", + &no_connector, + &mode)); + + return 0; +} + +static int drm_cmdline_test_interlace_only(void *ignored) +{ + struct drm_cmdline_mode mode = { }; + + FAIL_ON(drm_mode_parse_command_line_for_connector("i", + &no_connector, + &mode)); + + return 0; +} + static int drm_cmdline_test_res(void *ignored) { struct drm_cmdline_mode mode = { }; From 89781d0806c2c4f29072d3f00cb2dd4274aabc3d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 30 Aug 2019 12:03:38 +0200 Subject: [PATCH 024/169] ALSA: hda/realtek - Fix overridden device-specific initialization The recent change to shuffle the codec initialization procedure for Realtek via commit 607ca3bd220f ("ALSA: hda/realtek - EAPD turn on later") caused the silent output on some machines. This change was supposed to be safe, but it isn't actually; some devices have quirk setups to override the EAPD via COEF or BTL in the additional verb table, which is applied at the beginning of snd_hda_gen_init(). And this EAPD setup is again overridden in alc_auto_init_amp(). For recovering from the regression, tell snd_hda_gen_init() not to apply the verbs there by a new flag, then apply the verbs in alc_init(). BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=204727 Fixes: 607ca3bd220f ("ALSA: hda/realtek - EAPD turn on later") Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_generic.c | 3 ++- sound/pci/hda/hda_generic.h | 1 + sound/pci/hda/patch_realtek.c | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 5bf24fb819d2..10d502328b76 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -6009,7 +6009,8 @@ int snd_hda_gen_init(struct hda_codec *codec) if (spec->init_hook) spec->init_hook(codec); - snd_hda_apply_verbs(codec); + if (!spec->skip_verbs) + snd_hda_apply_verbs(codec); init_multi_out(codec); init_extra_out(codec); diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h index 5f199dcb0d18..fb9f1a90238b 100644 --- a/sound/pci/hda/hda_generic.h +++ b/sound/pci/hda/hda_generic.h @@ -243,6 +243,7 @@ struct hda_gen_spec { unsigned int indep_hp_enabled:1; /* independent HP enabled */ unsigned int have_aamix_ctl:1; unsigned int hp_mic_jack_modes:1; + unsigned int skip_verbs:1; /* don't apply verbs at snd_hda_gen_init() */ /* additional mute flags (only effective with auto_mute_via_amp=1) */ u64 mute_bits; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e333b3e30e31..03f903f9247d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -837,9 +837,11 @@ static int alc_init(struct hda_codec *codec) if (spec->init_hook) spec->init_hook(codec); + spec->gen.skip_verbs = 1; /* applied in below */ snd_hda_gen_init(codec); alc_fix_pll(codec); alc_auto_init_amp(codec, spec->init_amp); + snd_hda_apply_verbs(codec); /* apply verbs here after own init */ snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_INIT); From 0ce4a85f4f015d13a6a6b4954f4e8e63cd83f7b1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 26 Aug 2019 16:50:56 +0800 Subject: [PATCH 025/169] Revert "iommu/vt-d: Avoid duplicated pci dma alias consideration" This reverts commit 557529494d79f3f1fadd486dd18d2de0b19be4da. Commit 557529494d79f ("iommu/vt-d: Avoid duplicated pci dma alias consideration") aimed to address a NULL pointer deference issue happened when a thunderbolt device driver returned unexpectedly. Unfortunately, this change breaks a previous pci quirk added by commit cc346a4714a59 ("PCI: Add function 1 DMA alias quirk for Marvell devices"), as the result, devices like Marvell 88SE9128 SATA controller doesn't work anymore. We will continue to try to find the real culprit mentioned in 557529494d79f, but for now we should revert it to fix current breakage. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=204627 Cc: Stijn Tintel Cc: Petr Vandrovec Reported-by: Stijn Tintel Reported-by: Petr Vandrovec Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 55 +++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 12d094d08c0a..c4e0e4a9ee9e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -339,6 +339,8 @@ static void domain_exit(struct dmar_domain *domain); static void domain_remove_dev_info(struct dmar_domain *domain); static void dmar_remove_one_dev_info(struct device *dev); static void __dmar_remove_one_dev_info(struct device_domain_info *info); +static void domain_context_clear(struct intel_iommu *iommu, + struct device *dev); static int domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu); static bool device_is_rmrr_locked(struct device *dev); @@ -2105,9 +2107,26 @@ static int domain_context_mapping_one(struct dmar_domain *domain, return ret; } +struct domain_context_mapping_data { + struct dmar_domain *domain; + struct intel_iommu *iommu; + struct pasid_table *table; +}; + +static int domain_context_mapping_cb(struct pci_dev *pdev, + u16 alias, void *opaque) +{ + struct domain_context_mapping_data *data = opaque; + + return domain_context_mapping_one(data->domain, data->iommu, + data->table, PCI_BUS_NUM(alias), + alias & 0xff); +} + static int domain_context_mapping(struct dmar_domain *domain, struct device *dev) { + struct domain_context_mapping_data data; struct pasid_table *table; struct intel_iommu *iommu; u8 bus, devfn; @@ -2117,7 +2136,17 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev) return -ENODEV; table = intel_pasid_get_table(dev); - return domain_context_mapping_one(domain, iommu, table, bus, devfn); + + if (!dev_is_pci(dev)) + return domain_context_mapping_one(domain, iommu, table, + bus, devfn); + + data.domain = domain; + data.iommu = iommu; + data.table = table; + + return pci_for_each_dma_alias(to_pci_dev(dev), + &domain_context_mapping_cb, &data); } static int domain_context_mapped_cb(struct pci_dev *pdev, @@ -4759,6 +4788,28 @@ int __init intel_iommu_init(void) return ret; } +static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque) +{ + struct intel_iommu *iommu = opaque; + + domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff); + return 0; +} + +/* + * NB - intel-iommu lacks any sort of reference counting for the users of + * dependent devices. If multiple endpoints have intersecting dependent + * devices, unbinding the driver from any one of them will possibly leave + * the others unable to operate. + */ +static void domain_context_clear(struct intel_iommu *iommu, struct device *dev) +{ + if (!iommu || !dev || !dev_is_pci(dev)) + return; + + pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu); +} + static void __dmar_remove_one_dev_info(struct device_domain_info *info) { struct dmar_domain *domain; @@ -4779,7 +4830,7 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) PASID_RID2PASID); iommu_disable_dev_iotlb(info); - domain_context_clear_one(iommu, info->bus, info->devfn); + domain_context_clear(iommu, info->dev); intel_pasid_free_table(info->dev); } From 52d083472e0b64d1da5b6469ed3defb1ddc45929 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 31 Aug 2019 10:00:11 +0200 Subject: [PATCH 026/169] clang-format: Update with the latest for_each macro list Re-run the shell fragment that generated the original list. Signed-off-by: Miguel Ojeda --- .clang-format | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/.clang-format b/.clang-format index 2ffd69afc1a8..196ca317bd1f 100644 --- a/.clang-format +++ b/.clang-format @@ -107,10 +107,13 @@ ForEachMacros: - 'css_for_each_descendant_post' - 'css_for_each_descendant_pre' - 'device_for_each_child_node' + - 'dma_fence_chain_for_each' - 'drm_atomic_crtc_for_each_plane' - 'drm_atomic_crtc_state_for_each_plane' - 'drm_atomic_crtc_state_for_each_plane_state' - 'drm_atomic_for_each_plane_damage' + - 'drm_client_for_each_connector_iter' + - 'drm_client_for_each_modeset' - 'drm_connector_for_each_possible_encoder' - 'drm_for_each_connector_iter' - 'drm_for_each_crtc' @@ -126,6 +129,7 @@ ForEachMacros: - 'drm_mm_for_each_node_in_range' - 'drm_mm_for_each_node_safe' - 'flow_action_for_each' + - 'for_each_active_dev_scope' - 'for_each_active_drhd_unit' - 'for_each_active_iommu' - 'for_each_available_child_of_node' @@ -153,6 +157,8 @@ ForEachMacros: - 'for_each_cpu_not' - 'for_each_cpu_wrap' - 'for_each_dev_addr' + - 'for_each_dev_scope' + - 'for_each_displayid_db' - 'for_each_dma_cap_mask' - 'for_each_dpcm_be' - 'for_each_dpcm_be_rollback' @@ -169,6 +175,8 @@ ForEachMacros: - 'for_each_evictable_lru' - 'for_each_fib6_node_rt_rcu' - 'for_each_fib6_walker_rt' + - 'for_each_free_mem_pfn_range_in_zone' + - 'for_each_free_mem_pfn_range_in_zone_from' - 'for_each_free_mem_range' - 'for_each_free_mem_range_reverse' - 'for_each_func_rsrc' @@ -178,6 +186,7 @@ ForEachMacros: - 'for_each_ip_tunnel_rcu' - 'for_each_irq_nr' - 'for_each_link_codecs' + - 'for_each_link_platforms' - 'for_each_lru' - 'for_each_matching_node' - 'for_each_matching_node_and_match' @@ -302,7 +311,10 @@ ForEachMacros: - 'ide_port_for_each_present_dev' - 'idr_for_each_entry' - 'idr_for_each_entry_continue' + - 'idr_for_each_entry_continue_ul' - 'idr_for_each_entry_ul' + - 'in_dev_for_each_ifa_rcu' + - 'in_dev_for_each_ifa_rtnl' - 'inet_bind_bucket_for_each' - 'inet_lhash2_for_each_icsk_rcu' - 'key_for_each' @@ -343,8 +355,6 @@ ForEachMacros: - 'media_device_for_each_intf' - 'media_device_for_each_link' - 'media_device_for_each_pad' - - 'mp_bvec_for_each_page' - - 'mp_bvec_for_each_segment' - 'nanddev_io_for_each_page' - 'netdev_for_each_lower_dev' - 'netdev_for_each_lower_private' @@ -381,18 +391,19 @@ ForEachMacros: - 'radix_tree_for_each_slot' - 'radix_tree_for_each_tagged' - 'rbtree_postorder_for_each_entry_safe' + - 'rdma_for_each_block' - 'rdma_for_each_port' - 'resource_list_for_each_entry' - 'resource_list_for_each_entry_safe' - 'rhl_for_each_entry_rcu' - 'rhl_for_each_rcu' - 'rht_for_each' - - 'rht_for_each_from' - 'rht_for_each_entry' - 'rht_for_each_entry_from' - 'rht_for_each_entry_rcu' - 'rht_for_each_entry_rcu_from' - 'rht_for_each_entry_safe' + - 'rht_for_each_from' - 'rht_for_each_rcu' - 'rht_for_each_rcu_from' - '__rq_for_each_bio' From ae688e1720fd387de34f2140a735917411672bf1 Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Fri, 30 Aug 2019 14:15:35 +0100 Subject: [PATCH 027/169] arm64: dts: renesas: hihope-common: Fix eMMC status SDHI3 got accidentally disabled while adding USB 2.0 support, this patch fixes it. Fixes: 734d277f412a ("arm64: dts: renesas: hihope-common: Add USB 2.0 support") Signed-off-by: Fabrizio Castro Reviewed-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- arch/arm64/boot/dts/renesas/hihope-common.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/renesas/hihope-common.dtsi b/arch/arm64/boot/dts/renesas/hihope-common.dtsi index 3311a982fff8..23fd0224ca90 100644 --- a/arch/arm64/boot/dts/renesas/hihope-common.dtsi +++ b/arch/arm64/boot/dts/renesas/hihope-common.dtsi @@ -279,6 +279,7 @@ &sdhi3 { mmc-hs200-1_8v; non-removable; fixed-emmc-driver-type = <1>; + status = "okay"; }; &usb_extal_clk { From 55f7e5c364dce20e691fda329fb2a6cc3cbb63b6 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 2 Sep 2019 16:33:22 +1000 Subject: [PATCH 028/169] drm/nouveau/sec2/gp102: add missing MODULE_FIRMWAREs Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org [v5.2+] --- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp102.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp102.c index 84a2f243ed9b..4695f1c8e33f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp102.c @@ -190,6 +190,9 @@ MODULE_FIRMWARE("nvidia/gp102/nvdec/scrubber.bin"); MODULE_FIRMWARE("nvidia/gp102/sec2/desc.bin"); MODULE_FIRMWARE("nvidia/gp102/sec2/image.bin"); MODULE_FIRMWARE("nvidia/gp102/sec2/sig.bin"); +MODULE_FIRMWARE("nvidia/gp102/sec2/desc-1.bin"); +MODULE_FIRMWARE("nvidia/gp102/sec2/image-1.bin"); +MODULE_FIRMWARE("nvidia/gp102/sec2/sig-1.bin"); MODULE_FIRMWARE("nvidia/gp104/acr/bl.bin"); MODULE_FIRMWARE("nvidia/gp104/acr/unload_bl.bin"); MODULE_FIRMWARE("nvidia/gp104/acr/ucode_load.bin"); @@ -210,6 +213,9 @@ MODULE_FIRMWARE("nvidia/gp104/nvdec/scrubber.bin"); MODULE_FIRMWARE("nvidia/gp104/sec2/desc.bin"); MODULE_FIRMWARE("nvidia/gp104/sec2/image.bin"); MODULE_FIRMWARE("nvidia/gp104/sec2/sig.bin"); +MODULE_FIRMWARE("nvidia/gp104/sec2/desc-1.bin"); +MODULE_FIRMWARE("nvidia/gp104/sec2/image-1.bin"); +MODULE_FIRMWARE("nvidia/gp104/sec2/sig-1.bin"); MODULE_FIRMWARE("nvidia/gp106/acr/bl.bin"); MODULE_FIRMWARE("nvidia/gp106/acr/unload_bl.bin"); MODULE_FIRMWARE("nvidia/gp106/acr/ucode_load.bin"); @@ -230,6 +236,9 @@ MODULE_FIRMWARE("nvidia/gp106/nvdec/scrubber.bin"); MODULE_FIRMWARE("nvidia/gp106/sec2/desc.bin"); MODULE_FIRMWARE("nvidia/gp106/sec2/image.bin"); MODULE_FIRMWARE("nvidia/gp106/sec2/sig.bin"); +MODULE_FIRMWARE("nvidia/gp106/sec2/desc-1.bin"); +MODULE_FIRMWARE("nvidia/gp106/sec2/image-1.bin"); +MODULE_FIRMWARE("nvidia/gp106/sec2/sig-1.bin"); MODULE_FIRMWARE("nvidia/gp107/acr/bl.bin"); MODULE_FIRMWARE("nvidia/gp107/acr/unload_bl.bin"); MODULE_FIRMWARE("nvidia/gp107/acr/ucode_load.bin"); @@ -250,3 +259,6 @@ MODULE_FIRMWARE("nvidia/gp107/nvdec/scrubber.bin"); MODULE_FIRMWARE("nvidia/gp107/sec2/desc.bin"); MODULE_FIRMWARE("nvidia/gp107/sec2/image.bin"); MODULE_FIRMWARE("nvidia/gp107/sec2/sig.bin"); +MODULE_FIRMWARE("nvidia/gp107/sec2/desc-1.bin"); +MODULE_FIRMWARE("nvidia/gp107/sec2/image-1.bin"); +MODULE_FIRMWARE("nvidia/gp107/sec2/sig-1.bin"); From 29d9a0b50736768f042752070e5cdf4e4d4c00df Mon Sep 17 00:00:00 2001 From: "John S. Gruber" Date: Mon, 2 Sep 2019 00:00:54 +0200 Subject: [PATCH 029/169] x86/boot: Preserve boot_params.secure_boot from sanitizing Commit a90118c445cc ("x86/boot: Save fields explicitly, zero out everything else") now zeroes the secure boot setting information (enabled/disabled/...) passed by the boot loader or by the kernel's EFI handover mechanism. The problem manifests itself with signed kernels using the EFI handoff protocol with grub and the kernel loses the information whether secure boot is enabled in the firmware, i.e., the log message "Secure boot enabled" becomes "Secure boot could not be determined". efi_main() arch/x86/boot/compressed/eboot.c sets this field early but it is subsequently zeroed by the above referenced commit. Include boot_params.secure_boot in the preserve field list. [ bp: restructure commit message and massage. ] Fixes: a90118c445cc ("x86/boot: Save fields explicitly, zero out everything else") Signed-off-by: John S. Gruber Signed-off-by: Borislav Petkov Reviewed-by: John Hubbard Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Juergen Gross Cc: Mark Brown Cc: stable Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/CAPotdmSPExAuQcy9iAHqX3js_fc4mMLQOTr5RBGvizyCOPcTQQ@mail.gmail.com --- arch/x86/include/asm/bootparam_utils.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index 9e5f3c722c33..981fe923a59f 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -70,6 +70,7 @@ static void sanitize_boot_params(struct boot_params *boot_params) BOOT_PARAM_PRESERVE(eddbuf_entries), BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries), BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer), + BOOT_PARAM_PRESERVE(secure_boot), BOOT_PARAM_PRESERVE(hdr), BOOT_PARAM_PRESERVE(e820_table), BOOT_PARAM_PRESERVE(eddbuf), From 9b8bd476e78e89c9ea26c3b435ad0201c3d7dbf5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 29 Aug 2019 10:24:45 +0200 Subject: [PATCH 030/169] x86/uaccess: Don't leak the AC flags into __get_user() argument evaluation Identical to __put_user(); the __get_user() argument evalution will too leak UBSAN crud into the __uaccess_begin() / __uaccess_end() region. While uncommon this was observed to happen for: drivers/xen/gntdev.c: if (__get_user(old_status, batch->status[i])) where UBSAN added array bound checking. This complements commit: 6ae865615fc4 ("x86/uaccess: Dont leak the AC flag into __put_user() argument evaluation") Tested-by Sedat Dilek Reported-by: Randy Dunlap Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Poimboeuf Reviewed-by: Thomas Gleixner Cc: broonie@kernel.org Cc: sfr@canb.auug.org.au Cc: akpm@linux-foundation.org Cc: Randy Dunlap Cc: mhocko@suse.cz Cc: Josh Poimboeuf Link: https://lkml.kernel.org/r/20190829082445.GM2369@hirez.programming.kicks-ass.net --- arch/x86/include/asm/uaccess.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 9c4435307ff8..35c225ede0e4 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -444,8 +444,10 @@ __pu_label: \ ({ \ int __gu_err; \ __inttype(*(ptr)) __gu_val; \ + __typeof__(ptr) __gu_ptr = (ptr); \ + __typeof__(size) __gu_size = (size); \ __uaccess_begin_nospec(); \ - __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ + __get_user_size(__gu_val, __gu_ptr, __gu_size, __gu_err, -EFAULT); \ __uaccess_end(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ __builtin_expect(__gu_err, 0); \ From d33cd42d86671bed870827aa399aeb9f1da74119 Mon Sep 17 00:00:00 2001 From: Sam Bazley Date: Sun, 1 Sep 2019 03:31:30 +0100 Subject: [PATCH 031/169] ALSA: hda/realtek - Add quirk for HP Pavilion 15 HP Pavilion 15 (AMD Ryzen-based model) with 103c:84e7 needs the same quirk like HP Envy/Spectre x360 for enabling the mute LED over Mic3 pin. [ rearranged in the SSID number order by tiwai ] Signed-off-by: Sam Bazley Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 03f903f9247d..e47b5d2f694f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6981,6 +6981,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x82c0, "HP G3 mini premium", ALC221_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x83b9, "HP Spectre x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), + SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From eb3d8f42231aec65b64b079dd17bd6c008a3fe29 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 28 Aug 2019 11:26:13 -0400 Subject: [PATCH 032/169] NFS: Fix inode fileid checks in attribute revalidation code We want to throw out the attrbute if it refers to the mounted on fileid, and not the real fileid. However we do not want to block cache consistency updates from NFSv4 writes. Reported-by: Murphy Zhou Fixes: 7e10cc25bfa0 ("NFS: Don't refresh attributes with mounted-on-file...") Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c764cfe456e5..2a03bfeec10a 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1403,11 +1403,12 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) return 0; - /* No fileid? Just exit */ - if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) - return 0; + if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) { + /* Only a mounted-on-fileid? Just exit */ + if (fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) + return 0; /* Has the inode gone and changed behind our back? */ - if (nfsi->fileid != fattr->fileid) { + } else if (nfsi->fileid != fattr->fileid) { /* Is this perhaps the mounted-on fileid? */ if ((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) && nfsi->fileid == fattr->mounted_on_fileid) @@ -1807,11 +1808,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_display_fhandle_hash(NFS_FH(inode)), atomic_read(&inode->i_count), fattr->valid); - /* No fileid? Just exit */ - if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) - return 0; + if (!(fattr->valid & NFS_ATTR_FATTR_FILEID)) { + /* Only a mounted-on-fileid? Just exit */ + if (fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) + return 0; /* Has the inode gone and changed behind our back? */ - if (nfsi->fileid != fattr->fileid) { + } else if (nfsi->fileid != fattr->fileid) { /* Is this perhaps the mounted-on fileid? */ if ((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) && nfsi->fileid == fattr->mounted_on_fileid) From 4030b4c585c41eeefec7bd20ce3d0e100a0f2e4d Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Mon, 2 Sep 2019 20:41:43 +0800 Subject: [PATCH 033/169] x86/hyper-v: Fix overflow bug in fill_gva_list() When the 'start' parameter is >= 0xFF000000 on 32-bit systems, or >= 0xFFFFFFFF'FF000000 on 64-bit systems, fill_gva_list() gets into an infinite loop. With such inputs, 'cur' overflows after adding HV_TLB_FLUSH_UNIT and always compares as less than end. Memory is filled with guest virtual addresses until the system crashes. Fix this by never incrementing 'cur' to be larger than 'end'. Reported-by: Jong Hyun Park Signed-off-by: Tianyu Lan Reviewed-by: Michael Kelley Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 2ffd9e33ce4a ("x86/hyper-v: Use hypercall for remote TLB flush") Signed-off-by: Ingo Molnar --- arch/x86/hyperv/mmu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index e65d7fe6489f..5208ba49c89a 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -37,12 +37,14 @@ static inline int fill_gva_list(u64 gva_list[], int offset, * Lower 12 bits encode the number of additional * pages to flush (in addition to the 'cur' page). */ - if (diff >= HV_TLB_FLUSH_UNIT) + if (diff >= HV_TLB_FLUSH_UNIT) { gva_list[gva_n] |= ~PAGE_MASK; - else if (diff) + cur += HV_TLB_FLUSH_UNIT; + } else if (diff) { gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT; + cur = end; + } - cur += HV_TLB_FLUSH_UNIT; gva_n++; } while (cur < end); From e33b4325e60e146c2317a8b548cbd633239ff83b Mon Sep 17 00:00:00 2001 From: Yizhuo Date: Fri, 30 Aug 2019 19:00:48 -0700 Subject: [PATCH 034/169] net: stmmac: dwmac-sun8i: Variable "val" in function sun8i_dwmac_set_syscon() could be uninitialized In function sun8i_dwmac_set_syscon(), local variable "val" could be uninitialized if function regmap_field_read() returns -EINVAL. However, it will be used directly in the if statement, which is potentially unsafe. Signed-off-by: Yizhuo Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 4083019c547a..f97a4096f8fc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -873,7 +873,12 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) int ret; u32 reg, val; - regmap_field_read(gmac->regmap_field, &val); + ret = regmap_field_read(gmac->regmap_field, &val); + if (ret) { + dev_err(priv->device, "Fail to read from regmap field.\n"); + return ret; + } + reg = gmac->variant->default_syscon_value; if (reg != val) dev_warn(priv->device, From ff4dd081977da56566a848f071aed8fa92d604a1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 30 Aug 2019 11:30:03 -0400 Subject: [PATCH 035/169] configfs: stash the data we need into configfs_buffer at open time simplifies the ->read()/->write()/->release() instances nicely Signed-off-by: Al Viro Signed-off-by: Christoph Hellwig --- fs/configfs/file.c | 227 +++++++++++++++++++-------------------------- 1 file changed, 94 insertions(+), 133 deletions(-) diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 61e4db4390a1..eee6281a7b35 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -39,24 +39,18 @@ struct configfs_buffer { bool write_in_progress; char *bin_buffer; int bin_buffer_size; + int cb_max_size; + struct config_item *item; + struct module *owner; + union { + struct configfs_attribute *attr; + struct configfs_bin_attribute *bin_attr; + }; }; -/** - * fill_read_buffer - allocate and fill buffer from item. - * @dentry: dentry pointer. - * @buffer: data buffer for file. - * - * Allocate @buffer->page, if it hasn't been already, then call the - * config_item's show() method to fill the buffer with this attribute's - * data. - * This is called only once, on the file's first read. - */ -static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buffer) +static int fill_read_buffer(struct configfs_buffer * buffer) { - struct configfs_attribute * attr = to_attr(dentry); - struct config_item * item = to_item(dentry->d_parent); - int ret = 0; ssize_t count; if (!buffer->page) @@ -64,15 +58,15 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf if (!buffer->page) return -ENOMEM; - count = attr->show(item, buffer->page); + count = buffer->attr->show(buffer->item, buffer->page); + if (count < 0) + return count; + if (WARN_ON_ONCE(count > (ssize_t)SIMPLE_ATTR_SIZE)) + return -EIO; - BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE); - if (count >= 0) { - buffer->needs_read_fill = 0; - buffer->count = count; - } else - ret = count; - return ret; + buffer->needs_read_fill = 0; + buffer->count = count; + return 0; } /** @@ -97,12 +91,13 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf static ssize_t configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct configfs_buffer * buffer = file->private_data; + struct configfs_buffer *buffer = file->private_data; ssize_t retval = 0; mutex_lock(&buffer->mutex); if (buffer->needs_read_fill) { - if ((retval = fill_read_buffer(file->f_path.dentry,buffer))) + retval = fill_read_buffer(buffer); + if (retval) goto out; } pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", @@ -139,9 +134,6 @@ configfs_read_bin_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct configfs_buffer *buffer = file->private_data; - struct dentry *dentry = file->f_path.dentry; - struct config_item *item = to_item(dentry->d_parent); - struct configfs_bin_attribute *bin_attr = to_bin_attr(dentry); ssize_t retval = 0; ssize_t len = min_t(size_t, count, PAGE_SIZE); @@ -156,14 +148,14 @@ configfs_read_bin_file(struct file *file, char __user *buf, if (buffer->needs_read_fill) { /* perform first read with buf == NULL to get extent */ - len = bin_attr->read(item, NULL, 0); + len = buffer->bin_attr->read(buffer->item, NULL, 0); if (len <= 0) { retval = len; goto out; } /* do not exceed the maximum value */ - if (bin_attr->cb_max_size && len > bin_attr->cb_max_size) { + if (buffer->cb_max_size && len > buffer->cb_max_size) { retval = -EFBIG; goto out; } @@ -176,7 +168,8 @@ configfs_read_bin_file(struct file *file, char __user *buf, buffer->bin_buffer_size = len; /* perform second read to fill buffer */ - len = bin_attr->read(item, buffer->bin_buffer, len); + len = buffer->bin_attr->read(buffer->item, + buffer->bin_buffer, len); if (len < 0) { retval = len; vfree(buffer->bin_buffer); @@ -226,25 +219,10 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size return error ? -EFAULT : count; } - -/** - * flush_write_buffer - push buffer to config_item. - * @dentry: dentry to the attribute - * @buffer: data buffer for file. - * @count: number of bytes - * - * Get the correct pointers for the config_item and the attribute we're - * dealing with, then call the store() method for the attribute, - * passing the buffer that we acquired in fill_write_buffer(). - */ - static int -flush_write_buffer(struct dentry * dentry, struct configfs_buffer * buffer, size_t count) +flush_write_buffer(struct configfs_buffer *buffer, size_t count) { - struct configfs_attribute * attr = to_attr(dentry); - struct config_item * item = to_item(dentry->d_parent); - - return attr->store(item, buffer->page, count); + return buffer->attr->store(buffer->item, buffer->page, count); } @@ -268,13 +246,13 @@ flush_write_buffer(struct dentry * dentry, struct configfs_buffer * buffer, size static ssize_t configfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct configfs_buffer * buffer = file->private_data; + struct configfs_buffer *buffer = file->private_data; ssize_t len; mutex_lock(&buffer->mutex); len = fill_write_buffer(buffer, buf, count); if (len > 0) - len = flush_write_buffer(file->f_path.dentry, buffer, len); + len = flush_write_buffer(buffer, len); if (len > 0) *ppos += len; mutex_unlock(&buffer->mutex); @@ -299,8 +277,6 @@ configfs_write_bin_file(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct configfs_buffer *buffer = file->private_data; - struct dentry *dentry = file->f_path.dentry; - struct configfs_bin_attribute *bin_attr = to_bin_attr(dentry); void *tbuf = NULL; ssize_t len; @@ -316,8 +292,8 @@ configfs_write_bin_file(struct file *file, const char __user *buf, /* buffer grows? */ if (*ppos + count > buffer->bin_buffer_size) { - if (bin_attr->cb_max_size && - *ppos + count > bin_attr->cb_max_size) { + if (buffer->cb_max_size && + *ppos + count > buffer->cb_max_size) { len = -EFBIG; goto out; } @@ -349,31 +325,45 @@ configfs_write_bin_file(struct file *file, const char __user *buf, return len; } -static int check_perm(struct inode * inode, struct file * file, int type) +static int __configfs_open_file(struct inode *inode, struct file *file, int type) { - struct config_item *item = configfs_get_config_item(file->f_path.dentry->d_parent); - struct configfs_attribute * attr = to_attr(file->f_path.dentry); - struct configfs_bin_attribute *bin_attr = NULL; - struct configfs_buffer * buffer; - struct configfs_item_operations * ops = NULL; - int error = 0; + struct dentry *dentry = file->f_path.dentry; + struct configfs_attribute *attr; + struct configfs_buffer *buffer; + int error; - if (!item || !attr) - goto Einval; + error = -ENOMEM; + buffer = kzalloc(sizeof(struct configfs_buffer), GFP_KERNEL); + if (!buffer) + goto out; - if (type & CONFIGFS_ITEM_BIN_ATTR) - bin_attr = to_bin_attr(file->f_path.dentry); + error = -EINVAL; + buffer->item = configfs_get_config_item(dentry->d_parent); + if (!buffer->item) + goto out_free_buffer; - /* Grab the module reference for this attribute if we have one */ - if (!try_module_get(attr->ca_owner)) { - error = -ENODEV; - goto Done; + attr = to_attr(dentry); + if (!attr) + goto out_put_item; + + if (type & CONFIGFS_ITEM_BIN_ATTR) { + buffer->bin_attr = to_bin_attr(dentry); + buffer->cb_max_size = buffer->bin_attr->cb_max_size; + } else { + buffer->attr = attr; } - if (item->ci_type) - ops = item->ci_type->ct_item_ops; - else - goto Eaccess; + buffer->owner = attr->ca_owner; + /* Grab the module reference for this attribute if we have one */ + error = -ENODEV; + if (!try_module_get(buffer->owner)) + goto out_put_item; + + error = -EACCES; + if (!buffer->item->ci_type) + goto out_put_module; + + buffer->ops = buffer->item->ci_type->ct_item_ops; /* File needs write support. * The inode's perms must say it's ok, @@ -381,13 +371,11 @@ static int check_perm(struct inode * inode, struct file * file, int type) */ if (file->f_mode & FMODE_WRITE) { if (!(inode->i_mode & S_IWUGO)) - goto Eaccess; - + goto out_put_module; if ((type & CONFIGFS_ITEM_ATTR) && !attr->store) - goto Eaccess; - - if ((type & CONFIGFS_ITEM_BIN_ATTR) && !bin_attr->write) - goto Eaccess; + goto out_put_module; + if ((type & CONFIGFS_ITEM_BIN_ATTR) && !buffer->bin_attr->write) + goto out_put_module; } /* File needs read support. @@ -396,90 +384,65 @@ static int check_perm(struct inode * inode, struct file * file, int type) */ if (file->f_mode & FMODE_READ) { if (!(inode->i_mode & S_IRUGO)) - goto Eaccess; - + goto out_put_module; if ((type & CONFIGFS_ITEM_ATTR) && !attr->show) - goto Eaccess; - - if ((type & CONFIGFS_ITEM_BIN_ATTR) && !bin_attr->read) - goto Eaccess; + goto out_put_module; + if ((type & CONFIGFS_ITEM_BIN_ATTR) && !buffer->bin_attr->read) + goto out_put_module; } - /* No error? Great, allocate a buffer for the file, and store it - * it in file->private_data for easy access. - */ - buffer = kzalloc(sizeof(struct configfs_buffer),GFP_KERNEL); - if (!buffer) { - error = -ENOMEM; - goto Enomem; - } mutex_init(&buffer->mutex); buffer->needs_read_fill = 1; buffer->read_in_progress = false; buffer->write_in_progress = false; - buffer->ops = ops; file->private_data = buffer; - goto Done; + return 0; - Einval: - error = -EINVAL; - goto Done; - Eaccess: - error = -EACCES; - Enomem: - module_put(attr->ca_owner); - Done: - if (error && item) - config_item_put(item); +out_put_module: + module_put(buffer->owner); +out_put_item: + config_item_put(buffer->item); +out_free_buffer: + kfree(buffer); +out: return error; } static int configfs_release(struct inode *inode, struct file *filp) { - struct config_item * item = to_item(filp->f_path.dentry->d_parent); - struct configfs_attribute * attr = to_attr(filp->f_path.dentry); - struct module * owner = attr->ca_owner; - struct configfs_buffer * buffer = filp->private_data; + struct configfs_buffer *buffer = filp->private_data; - if (item) - config_item_put(item); - /* After this point, attr should not be accessed. */ - module_put(owner); - - if (buffer) { - if (buffer->page) - free_page((unsigned long)buffer->page); - mutex_destroy(&buffer->mutex); - kfree(buffer); - } + if (buffer->item) + config_item_put(buffer->item); + module_put(buffer->owner); + if (buffer->page) + free_page((unsigned long)buffer->page); + mutex_destroy(&buffer->mutex); + kfree(buffer); return 0; } static int configfs_open_file(struct inode *inode, struct file *filp) { - return check_perm(inode, filp, CONFIGFS_ITEM_ATTR); + return __configfs_open_file(inode, filp, CONFIGFS_ITEM_ATTR); } static int configfs_open_bin_file(struct inode *inode, struct file *filp) { - return check_perm(inode, filp, CONFIGFS_ITEM_BIN_ATTR); + return __configfs_open_file(inode, filp, CONFIGFS_ITEM_BIN_ATTR); } -static int configfs_release_bin_file(struct inode *inode, struct file *filp) +static int configfs_release_bin_file(struct inode *inode, struct file *file) { - struct configfs_buffer *buffer = filp->private_data; - struct dentry *dentry = filp->f_path.dentry; - struct config_item *item = to_item(dentry->d_parent); - struct configfs_bin_attribute *bin_attr = to_bin_attr(dentry); - ssize_t len = 0; - int ret; + struct configfs_buffer *buffer = file->private_data; buffer->read_in_progress = false; if (buffer->write_in_progress) { buffer->write_in_progress = false; - len = bin_attr->write(item, buffer->bin_buffer, + /* result of ->release() is ignored */ + buffer->bin_attr->write(buffer->item, buffer->bin_buffer, buffer->bin_buffer_size); /* vfree on NULL is safe */ @@ -489,10 +452,8 @@ static int configfs_release_bin_file(struct inode *inode, struct file *filp) buffer->needs_read_fill = 1; } - ret = configfs_release(inode, filp); - if (len < 0) - return len; - return ret; + configfs_release(inode, file); + return 0; } From f19e4ed1e1edbfa3c9ccb9fed17759b7d6db24c6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 29 Aug 2019 23:13:30 -0400 Subject: [PATCH 036/169] configfs_register_group() shouldn't be (and isn't) called in rmdirable parts revert cc57c07343bd "configfs: fix registered group removal" It was an attempt to handle something that fundamentally doesn't work - configfs_register_group() should never be done in a part of tree that can be rmdir'ed. And in mainline it never had been, so let's not borrow trouble; the fix was racy anyway, it would take a lot more to make that work and desired semantics is not clear. Signed-off-by: Al Viro Signed-off-by: Christoph Hellwig --- fs/configfs/dir.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 92112915de8e..21a795679e20 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1771,16 +1771,6 @@ void configfs_unregister_group(struct config_group *group) struct dentry *dentry = group->cg_item.ci_dentry; struct dentry *parent = group->cg_item.ci_parent->ci_dentry; - mutex_lock(&subsys->su_mutex); - if (!group->cg_item.ci_parent->ci_group) { - /* - * The parent has already been unlinked and detached - * due to a rmdir. - */ - goto unlink_group; - } - mutex_unlock(&subsys->su_mutex); - inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); spin_lock(&configfs_dirent_lock); configfs_detach_prep(dentry, NULL); @@ -1796,7 +1786,6 @@ void configfs_unregister_group(struct config_group *group) dput(dentry); mutex_lock(&subsys->su_mutex); -unlink_group: unlink_group(group); mutex_unlock(&subsys->su_mutex); } From 47320fbe11a6059ae502c9c16b668022fdb4cf76 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Aug 2019 19:56:13 -0400 Subject: [PATCH 037/169] configfs: new object reprsenting tree fragments Refcounted, hangs of configfs_dirent, created by operations that add fragments to configfs tree (mkdir and configfs_register_{subsystem,group}). Will be used in the next commit to provide exclusion between fragment removal and ->show/->store calls. Signed-off-by: Al Viro Signed-off-by: Christoph Hellwig --- fs/configfs/configfs_internal.h | 15 ++++- fs/configfs/dir.c | 105 +++++++++++++++++++++++++------- fs/configfs/file.c | 4 +- 3 files changed, 97 insertions(+), 27 deletions(-) diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index f752d83a9c44..520f1813e789 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -20,6 +20,15 @@ #include #include +struct configfs_fragment { + atomic_t frag_count; + struct rw_semaphore frag_sem; + bool frag_dead; +}; + +void put_fragment(struct configfs_fragment *); +struct configfs_fragment *get_fragment(struct configfs_fragment *); + struct configfs_dirent { atomic_t s_count; int s_dependent_count; @@ -34,6 +43,7 @@ struct configfs_dirent { #ifdef CONFIG_LOCKDEP int s_depth; #endif + struct configfs_fragment *s_frag; }; #define CONFIGFS_ROOT 0x0001 @@ -61,8 +71,8 @@ extern int configfs_create(struct dentry *, umode_t mode, void (*init)(struct in extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); extern int configfs_create_bin_file(struct config_item *, const struct configfs_bin_attribute *); -extern int configfs_make_dirent(struct configfs_dirent *, - struct dentry *, void *, umode_t, int); +extern int configfs_make_dirent(struct configfs_dirent *, struct dentry *, + void *, umode_t, int, struct configfs_fragment *); extern int configfs_dirent_is_ready(struct configfs_dirent *); extern void configfs_hash_and_remove(struct dentry * dir, const char * name); @@ -137,6 +147,7 @@ static inline void release_configfs_dirent(struct configfs_dirent * sd) { if (!(sd->s_type & CONFIGFS_ROOT)) { kfree(sd->s_iattr); + put_fragment(sd->s_frag); kmem_cache_free(configfs_dir_cachep, sd); } } diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 21a795679e20..1df65c2440da 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -151,11 +151,38 @@ configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd) #endif /* CONFIG_LOCKDEP */ +static struct configfs_fragment *new_fragment(void) +{ + struct configfs_fragment *p; + + p = kmalloc(sizeof(struct configfs_fragment), GFP_KERNEL); + if (p) { + atomic_set(&p->frag_count, 1); + init_rwsem(&p->frag_sem); + p->frag_dead = false; + } + return p; +} + +void put_fragment(struct configfs_fragment *frag) +{ + if (frag && atomic_dec_and_test(&frag->frag_count)) + kfree(frag); +} + +struct configfs_fragment *get_fragment(struct configfs_fragment *frag) +{ + if (likely(frag)) + atomic_inc(&frag->frag_count); + return frag; +} + /* * Allocates a new configfs_dirent and links it to the parent configfs_dirent */ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *parent_sd, - void *element, int type) + void *element, int type, + struct configfs_fragment *frag) { struct configfs_dirent * sd; @@ -175,6 +202,7 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *paren kmem_cache_free(configfs_dir_cachep, sd); return ERR_PTR(-ENOENT); } + sd->s_frag = get_fragment(frag); list_add(&sd->s_sibling, &parent_sd->s_children); spin_unlock(&configfs_dirent_lock); @@ -209,11 +237,11 @@ static int configfs_dirent_exists(struct configfs_dirent *parent_sd, int configfs_make_dirent(struct configfs_dirent * parent_sd, struct dentry * dentry, void * element, - umode_t mode, int type) + umode_t mode, int type, struct configfs_fragment *frag) { struct configfs_dirent * sd; - sd = configfs_new_dirent(parent_sd, element, type); + sd = configfs_new_dirent(parent_sd, element, type, frag); if (IS_ERR(sd)) return PTR_ERR(sd); @@ -260,7 +288,8 @@ static void init_symlink(struct inode * inode) * until it is validated by configfs_dir_set_ready() */ -static int configfs_create_dir(struct config_item *item, struct dentry *dentry) +static int configfs_create_dir(struct config_item *item, struct dentry *dentry, + struct configfs_fragment *frag) { int error; umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; @@ -273,7 +302,8 @@ static int configfs_create_dir(struct config_item *item, struct dentry *dentry) return error; error = configfs_make_dirent(p->d_fsdata, dentry, item, mode, - CONFIGFS_DIR | CONFIGFS_USET_CREATING); + CONFIGFS_DIR | CONFIGFS_USET_CREATING, + frag); if (unlikely(error)) return error; @@ -338,9 +368,10 @@ int configfs_create_link(struct configfs_symlink *sl, { int err = 0; umode_t mode = S_IFLNK | S_IRWXUGO; + struct configfs_dirent *p = parent->d_fsdata; - err = configfs_make_dirent(parent->d_fsdata, dentry, sl, mode, - CONFIGFS_ITEM_LINK); + err = configfs_make_dirent(p, dentry, sl, mode, + CONFIGFS_ITEM_LINK, p->s_frag); if (!err) { err = configfs_create(dentry, mode, init_symlink); if (err) { @@ -599,7 +630,8 @@ static int populate_attrs(struct config_item *item) static int configfs_attach_group(struct config_item *parent_item, struct config_item *item, - struct dentry *dentry); + struct dentry *dentry, + struct configfs_fragment *frag); static void configfs_detach_group(struct config_item *item); static void detach_groups(struct config_group *group) @@ -647,7 +679,8 @@ static void detach_groups(struct config_group *group) * try using vfs_mkdir. Just a thought. */ static int create_default_group(struct config_group *parent_group, - struct config_group *group) + struct config_group *group, + struct configfs_fragment *frag) { int ret; struct configfs_dirent *sd; @@ -663,7 +696,7 @@ static int create_default_group(struct config_group *parent_group, d_add(child, NULL); ret = configfs_attach_group(&parent_group->cg_item, - &group->cg_item, child); + &group->cg_item, child, frag); if (!ret) { sd = child->d_fsdata; sd->s_type |= CONFIGFS_USET_DEFAULT; @@ -677,13 +710,14 @@ static int create_default_group(struct config_group *parent_group, return ret; } -static int populate_groups(struct config_group *group) +static int populate_groups(struct config_group *group, + struct configfs_fragment *frag) { struct config_group *new_group; int ret = 0; list_for_each_entry(new_group, &group->default_groups, group_entry) { - ret = create_default_group(group, new_group); + ret = create_default_group(group, new_group, frag); if (ret) { detach_groups(group); break; @@ -797,11 +831,12 @@ static void link_group(struct config_group *parent_group, struct config_group *g */ static int configfs_attach_item(struct config_item *parent_item, struct config_item *item, - struct dentry *dentry) + struct dentry *dentry, + struct configfs_fragment *frag) { int ret; - ret = configfs_create_dir(item, dentry); + ret = configfs_create_dir(item, dentry, frag); if (!ret) { ret = populate_attrs(item); if (ret) { @@ -831,12 +866,13 @@ static void configfs_detach_item(struct config_item *item) static int configfs_attach_group(struct config_item *parent_item, struct config_item *item, - struct dentry *dentry) + struct dentry *dentry, + struct configfs_fragment *frag) { int ret; struct configfs_dirent *sd; - ret = configfs_attach_item(parent_item, item, dentry); + ret = configfs_attach_item(parent_item, item, dentry, frag); if (!ret) { sd = dentry->d_fsdata; sd->s_type |= CONFIGFS_USET_DIR; @@ -852,7 +888,7 @@ static int configfs_attach_group(struct config_item *parent_item, */ inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD); configfs_adjust_dir_dirent_depth_before_populate(sd); - ret = populate_groups(to_config_group(item)); + ret = populate_groups(to_config_group(item), frag); if (ret) { configfs_detach_item(item); d_inode(dentry)->i_flags |= S_DEAD; @@ -1247,6 +1283,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode struct configfs_dirent *sd; const struct config_item_type *type; struct module *subsys_owner = NULL, *new_item_owner = NULL; + struct configfs_fragment *frag; char *name; sd = dentry->d_parent->d_fsdata; @@ -1265,6 +1302,12 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode goto out; } + frag = new_fragment(); + if (!frag) { + ret = -ENOMEM; + goto out; + } + /* Get a working ref for the duration of this function */ parent_item = configfs_get_config_item(dentry->d_parent); type = parent_item->ci_type; @@ -1367,9 +1410,9 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode spin_unlock(&configfs_dirent_lock); if (group) - ret = configfs_attach_group(parent_item, item, dentry); + ret = configfs_attach_group(parent_item, item, dentry, frag); else - ret = configfs_attach_item(parent_item, item, dentry); + ret = configfs_attach_item(parent_item, item, dentry, frag); spin_lock(&configfs_dirent_lock); sd->s_type &= ~CONFIGFS_USET_IN_MKDIR; @@ -1406,6 +1449,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode * reference. */ config_item_put(parent_item); + put_fragment(frag); out: return ret; @@ -1574,7 +1618,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file) */ err = -ENOENT; if (configfs_dirent_is_ready(parent_sd)) { - file->private_data = configfs_new_dirent(parent_sd, NULL, 0); + file->private_data = configfs_new_dirent(parent_sd, NULL, 0, NULL); if (IS_ERR(file->private_data)) err = PTR_ERR(file->private_data); else @@ -1732,8 +1776,13 @@ int configfs_register_group(struct config_group *parent_group, { struct configfs_subsystem *subsys = parent_group->cg_subsys; struct dentry *parent; + struct configfs_fragment *frag; int ret; + frag = new_fragment(); + if (!frag) + return -ENOMEM; + mutex_lock(&subsys->su_mutex); link_group(parent_group, group); mutex_unlock(&subsys->su_mutex); @@ -1741,7 +1790,7 @@ int configfs_register_group(struct config_group *parent_group, parent = parent_group->cg_item.ci_dentry; inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); - ret = create_default_group(parent_group, group); + ret = create_default_group(parent_group, group, frag); if (ret) goto err_out; @@ -1749,12 +1798,14 @@ int configfs_register_group(struct config_group *parent_group, configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata); spin_unlock(&configfs_dirent_lock); inode_unlock(d_inode(parent)); + put_fragment(frag); return 0; err_out: inode_unlock(d_inode(parent)); mutex_lock(&subsys->su_mutex); unlink_group(group); mutex_unlock(&subsys->su_mutex); + put_fragment(frag); return ret; } EXPORT_SYMBOL(configfs_register_group); @@ -1842,10 +1893,17 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) struct dentry *dentry; struct dentry *root; struct configfs_dirent *sd; + struct configfs_fragment *frag; + + frag = new_fragment(); + if (!frag) + return -ENOMEM; root = configfs_pin_fs(); - if (IS_ERR(root)) + if (IS_ERR(root)) { + put_fragment(frag); return PTR_ERR(root); + } if (!group->cg_item.ci_name) group->cg_item.ci_name = group->cg_item.ci_namebuf; @@ -1861,7 +1919,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) d_add(dentry, NULL); err = configfs_attach_group(sd->s_element, &group->cg_item, - dentry); + dentry, frag); if (err) { BUG_ON(d_inode(dentry)); d_drop(dentry); @@ -1879,6 +1937,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) unlink_group(group); configfs_release_fs(); } + put_fragment(frag); return err; } diff --git a/fs/configfs/file.c b/fs/configfs/file.c index eee6281a7b35..9d5e888df60b 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -488,7 +488,7 @@ int configfs_create_file(struct config_item * item, const struct configfs_attrib inode_lock_nested(d_inode(dir), I_MUTEX_NORMAL); error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, - CONFIGFS_ITEM_ATTR); + CONFIGFS_ITEM_ATTR, parent_sd->s_frag); inode_unlock(d_inode(dir)); return error; @@ -510,7 +510,7 @@ int configfs_create_bin_file(struct config_item *item, inode_lock_nested(dir->d_inode, I_MUTEX_NORMAL); error = configfs_make_dirent(parent_sd, NULL, (void *) bin_attr, mode, - CONFIGFS_ITEM_BIN_ATTR); + CONFIGFS_ITEM_BIN_ATTR, parent_sd->s_frag); inode_unlock(dir->d_inode); return error; From 48bd0d68cd4b849c0bb99dc1fb01b013ad4aa942 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Sat, 31 Aug 2019 01:40:33 -0300 Subject: [PATCH 038/169] netfilter: bridge: Drops IPv6 packets if IPv6 module is not loaded A kernel panic can happen if a host has disabled IPv6 on boot and have to process guest packets (coming from a bridge) using it's ip6tables. IPv6 packets need to be dropped if the IPv6 module is not loaded, and the host ip6tables will be used. Signed-off-by: Leonardo Bras Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_hooks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index d3f9592f4ff8..af7800103e51 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -496,6 +496,10 @@ static unsigned int br_nf_pre_routing(void *priv, if (!brnet->call_ip6tables && !br_opt_get(br, BROPT_NF_CALL_IP6TABLES)) return NF_ACCEPT; + if (!ipv6_mod_enabled()) { + pr_warn_once("Module ipv6 is disabled, so call_ip6tables is not supported."); + return NF_DROP; + } nf_bridge_pull_encap_header_rcsum(skb); return br_nf_pre_routing_ipv6(priv, skb, state); From 039b1f4f24ecc8493b6bb9d70b4b78750d1b35c2 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Sun, 1 Sep 2019 11:48:08 +0200 Subject: [PATCH 039/169] netfilter: nft_socket: fix erroneous socket assignment The socket assignment is wrong, see skb_orphan(): When skb->destructor callback is not set, but skb->sk is set, this hits BUG(). Link: https://bugzilla.redhat.com/show_bug.cgi?id=1651813 Fixes: 554ced0a6e29 ("netfilter: nf_tables: add support for native socket matching") Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_socket.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index d7f3776dfd71..637ce3e8c575 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -47,9 +47,6 @@ static void nft_socket_eval(const struct nft_expr *expr, return; } - /* So that subsequent socket matching not to require other lookups. */ - skb->sk = sk; - switch(priv->key) { case NFT_SOCKET_TRANSPARENT: nft_reg_store8(dest, inet_sk_transparent(sk)); @@ -66,6 +63,9 @@ static void nft_socket_eval(const struct nft_expr *expr, WARN_ON(1); regs->verdict.code = NFT_BREAK; } + + if (sk != skb->sk) + sock_gen_put(sk); } static const struct nla_policy nft_socket_policy[NFTA_SOCKET_MAX + 1] = { From 60083f9e94b2f28047d71ed778adf89357c1a8fb Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Mon, 2 Sep 2019 18:00:56 +0800 Subject: [PATCH 040/169] ALSA: hda/realtek - Enable internal speaker & headset mic of ASUS UX431FL Original pin node values of ASUS UX431FL with ALC294: 0x12 0xb7a60140 0x13 0x40000000 0x14 0x90170110 0x15 0x411111f0 0x16 0x411111f0 0x17 0x90170111 0x18 0x411111f0 0x19 0x411111f0 0x1a 0x411111f0 0x1b 0x411111f0 0x1d 0x4066852d 0x1e 0x411111f0 0x1f 0x411111f0 0x21 0x04211020 1. Has duplicated internal speakers (0x14 & 0x17) which makes the output route become confused. So, the output volume cannot be changed by setting. 2. Misses the headset mic pin node. This patch disables the confusing speaker (NID 0x14) and enables the headset mic (NID 0x19). Link: https://lore.kernel.org/r/20190902100054.6941-1-jian-hong@endlessm.com Signed-off-by: Jian-Hong Pan Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e47b5d2f694f..453e203ac7b9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5799,6 +5799,7 @@ enum { ALC286_FIXUP_ACER_AIO_HEADSET_MIC, ALC256_FIXUP_ASUS_MIC_NO_PRESENCE, ALC299_FIXUP_PREDATOR_SPK, + ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC, }; static const struct hda_fixup alc269_fixups[] = { @@ -6839,6 +6840,16 @@ static const struct hda_fixup alc269_fixups[] = { { } } }, + [ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x14, 0x411111f0 }, /* disable confusing internal speaker */ + { 0x19, 0x04a11150 }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -6998,6 +7009,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK), SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A), SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC), + SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC), From 5e2d2cc2588bd3307ce3937acbc2ed03c830a861 Mon Sep 17 00:00:00 2001 From: Liangyan Date: Mon, 26 Aug 2019 20:16:33 +0800 Subject: [PATCH 041/169] sched/fair: Don't assign runtime for throttled cfs_rq do_sched_cfs_period_timer() will refill cfs_b runtime and call distribute_cfs_runtime to unthrottle cfs_rq, sometimes cfs_b->runtime will allocate all quota to one cfs_rq incorrectly, then other cfs_rqs attached to this cfs_b can't get runtime and will be throttled. We find that one throttled cfs_rq has non-negative cfs_rq->runtime_remaining and cause an unexpetced cast from s64 to u64 in snippet: distribute_cfs_runtime() { runtime = -cfs_rq->runtime_remaining + 1; } The runtime here will change to a large number and consume all cfs_b->runtime in this cfs_b period. According to Ben Segall, the throttled cfs_rq can have account_cfs_rq_runtime called on it because it is throttled before idle_balance, and the idle_balance calls update_rq_clock to add time that is accounted to the task. This commit prevents cfs_rq to be assgined new runtime if it has been throttled until that distribute_cfs_runtime is called. Signed-off-by: Liangyan Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Reviewed-by: Ben Segall Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: shanpeic@linux.alibaba.com Cc: stable@vger.kernel.org Cc: xlpang@linux.alibaba.com Fixes: d3d9dc330236 ("sched: Throttle entities exceeding their allowed bandwidth") Link: https://lkml.kernel.org/r/20190826121633.6538-1-liangyan.peng@linux.alibaba.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bc9cfeaac8bd..500f5db0de0b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4470,6 +4470,8 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) if (likely(cfs_rq->runtime_remaining > 0)) return; + if (cfs_rq->throttled) + return; /* * if we're unable to extend our runtime we resched so that the active * hierarchy can be throttled @@ -4673,6 +4675,9 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, if (!cfs_rq_throttled(cfs_rq)) goto next; + /* By the above check, this should never be true */ + SCHED_WARN_ON(cfs_rq->runtime_remaining > 0); + runtime = -cfs_rq->runtime_remaining + 1; if (runtime > remaining) runtime = remaining; From 8744daf4b0699b724ee0a56b313a6c0c4ea289e3 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Mon, 26 Aug 2019 08:53:29 -0700 Subject: [PATCH 042/169] iommu/vt-d: Remove global page flush support Global pages support is removed from VT-d spec 3.0. Since global pages G flag only affects first-level paging structures and because DMA request with PASID are only supported by VT-d spec. 3.0 and onward, we can safely remove global pages support. For kernel shared virtual address IOTLB invalidation, PASID granularity and page selective within PASID will be used. There is no global granularity supported. Without this fix, IOTLB invalidation will cause invalid descriptor error in the queued invalidation (QI) interface. Fixes: 1c4f88b7f1f9 ("iommu/vt-d: Shared virtual address in scalable mode") Reported-by: Sanjay K Kumar Signed-off-by: Jacob Pan Signed-off-by: Joerg Roedel --- drivers/iommu/intel-svm.c | 36 +++++++++++++++--------------------- include/linux/intel-iommu.h | 3 --- 2 files changed, 15 insertions(+), 24 deletions(-) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 780de0caafe8..9b159132405d 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -100,24 +100,19 @@ int intel_svm_finish_prq(struct intel_iommu *iommu) } static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev, - unsigned long address, unsigned long pages, int ih, int gl) + unsigned long address, unsigned long pages, int ih) { struct qi_desc desc; - if (pages == -1) { - /* For global kernel pages we have to flush them in *all* PASIDs - * because that's the only option the hardware gives us. Despite - * the fact that they are actually only accessible through one. */ - if (gl) - desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | - QI_EIOTLB_DID(sdev->did) | - QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) | - QI_EIOTLB_TYPE; - else - desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | - QI_EIOTLB_DID(sdev->did) | - QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | - QI_EIOTLB_TYPE; + /* + * Do PASID granu IOTLB invalidation if page selective capability is + * not available. + */ + if (pages == -1 || !cap_pgsel_inv(svm->iommu->cap)) { + desc.qw0 = QI_EIOTLB_PASID(svm->pasid) | + QI_EIOTLB_DID(sdev->did) | + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | + QI_EIOTLB_TYPE; desc.qw1 = 0; } else { int mask = ilog2(__roundup_pow_of_two(pages)); @@ -127,7 +122,6 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE; desc.qw1 = QI_EIOTLB_ADDR(address) | - QI_EIOTLB_GL(gl) | QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask); } @@ -162,13 +156,13 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d } static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, - unsigned long pages, int ih, int gl) + unsigned long pages, int ih) { struct intel_svm_dev *sdev; rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) - intel_flush_svm_range_dev(svm, sdev, address, pages, ih, gl); + intel_flush_svm_range_dev(svm, sdev, address, pages, ih); rcu_read_unlock(); } @@ -180,7 +174,7 @@ static void intel_invalidate_range(struct mmu_notifier *mn, struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); intel_flush_svm_range(svm, start, - (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0); + (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0); } static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) @@ -203,7 +197,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) { intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid); - intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm); + intel_flush_svm_range_dev(svm, sdev, 0, -1, 0); } rcu_read_unlock(); @@ -425,7 +419,7 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) * large and has to be physically contiguous. So it's * hard to be as defensive as we might like. */ intel_pasid_tear_down_entry(iommu, dev, svm->pasid); - intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm); + intel_flush_svm_range_dev(svm, sdev, 0, -1, 0); kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index f2ae8a006ff8..4fc6454f7ebb 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -346,7 +346,6 @@ enum { #define QI_PC_PASID_SEL (QI_PC_TYPE | QI_PC_GRAN(1)) #define QI_EIOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) -#define QI_EIOTLB_GL(gl) (((u64)gl) << 7) #define QI_EIOTLB_IH(ih) (((u64)ih) << 6) #define QI_EIOTLB_AM(am) (((u64)am)) #define QI_EIOTLB_PASID(pasid) (((u64)pasid) << 32) @@ -378,8 +377,6 @@ enum { #define QI_RESP_INVALID 0x1 #define QI_RESP_FAILURE 0xf -#define QI_GRAN_ALL_ALL 0 -#define QI_GRAN_NONG_ALL 1 #define QI_GRAN_NONG_PASID 2 #define QI_GRAN_PSI_PASID 3 From 34b0e9b767bfa09ae233ca0d6ceb299bf2e24600 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 13 Aug 2019 15:36:40 +0200 Subject: [PATCH 043/169] mt76: mt76x0e: don't use hw encryption for MT7630E Since 41634aa8d6db ("mt76: only schedule txqs from the tx tasklet") I can observe firmware hangs on MT7630E on station mode: tx stop functioning after minor activity (rx keep working) and on module unload device fail to stop with messages: [ 5446.141413] mt76x0e 0000:06:00.0: TX DMA did not stop [ 5449.176764] mt76x0e 0000:06:00.0: TX DMA did not stop Loading module again results in failure to associate with AP. Only machine power off / power on cycle can make device work again. It's unclear why commit 41634aa8d6db causes the problem, but it is related to HW encryption. Since issue is a firmware hang, that is super hard to debug, just disable HW encryption as fix for the issue. Fixes: 41634aa8d6db ("mt76: only schedule txqs from the tx tasklet") Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt76x0/pci.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c index 4585e1b756c2..6117e6ca08cb 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c @@ -62,6 +62,19 @@ static void mt76x0e_stop(struct ieee80211_hw *hw) mt76x0e_stop_hw(dev); } +static int +mt76x0e_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, + struct ieee80211_vif *vif, struct ieee80211_sta *sta, + struct ieee80211_key_conf *key) +{ + struct mt76x02_dev *dev = hw->priv; + + if (is_mt7630(dev)) + return -EOPNOTSUPP; + + return mt76x02_set_key(hw, cmd, vif, sta, key); +} + static void mt76x0e_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u32 queues, bool drop) @@ -78,7 +91,7 @@ static const struct ieee80211_ops mt76x0e_ops = { .configure_filter = mt76x02_configure_filter, .bss_info_changed = mt76x02_bss_info_changed, .sta_state = mt76_sta_state, - .set_key = mt76x02_set_key, + .set_key = mt76x0e_set_key, .conf_tx = mt76x02_conf_tx, .sw_scan_start = mt76x02_sw_scan, .sw_scan_complete = mt76x02_sw_scan_complete, From 70702265a04aa0ce5a7bde77d13456209992b32f Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 13 Aug 2019 15:36:56 +0200 Subject: [PATCH 044/169] mt76: mt76x0e: disable 5GHz band for MT7630E MT7630E hardware does support 5GHz, but we do not properly configure phy for 5GHz channels. Scanning at this band not only do not show any APs but also can hang the firmware. Since vendor reference driver do not support 5GHz we don't know how properly configure 5GHz channels. So disable this band for MT7630E . Cc: stable@vger.kernel.org Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c index 40c0d536e20d..9d4426f6905f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c @@ -59,6 +59,11 @@ static void mt76x0_set_chip_cap(struct mt76x02_dev *dev) dev_dbg(dev->mt76.dev, "mask out 2GHz support\n"); } + if (is_mt7630(dev)) { + dev->mt76.cap.has_5ghz = false; + dev_dbg(dev->mt76.dev, "mask out 5GHz support\n"); + } + if (!mt76x02_field_valid(nic_conf1 & 0xff)) nic_conf1 &= 0xff00; From 7caac62ed598a196d6ddf8d9c121e12e082cac3a Mon Sep 17 00:00:00 2001 From: Wen Huang Date: Wed, 28 Aug 2019 10:07:51 +0800 Subject: [PATCH 045/169] mwifiex: Fix three heap overflow at parsing element in cfg80211_ap_settings mwifiex_update_vs_ie(),mwifiex_set_uap_rates() and mwifiex_set_wmm_params() call memcpy() without checking the destination size.Since the source is given from user-space, this may trigger a heap buffer overflow. Fix them by putting the length check before performing memcpy(). This fix addresses CVE-2019-14814,CVE-2019-14815,CVE-2019-14816. Signed-off-by: Wen Huang Acked-by: Ganapathi Bhat Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/ie.c | 3 +++ drivers/net/wireless/marvell/mwifiex/uap_cmd.c | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/ie.c b/drivers/net/wireless/marvell/mwifiex/ie.c index 653d347a9a19..580387f9f12a 100644 --- a/drivers/net/wireless/marvell/mwifiex/ie.c +++ b/drivers/net/wireless/marvell/mwifiex/ie.c @@ -241,6 +241,9 @@ static int mwifiex_update_vs_ie(const u8 *ies, int ies_len, } vs_ie = (struct ieee_types_header *)vendor_ie; + if (le16_to_cpu(ie->ie_length) + vs_ie->len + 2 > + IEEE_MAX_IE_SIZE) + return -EINVAL; memcpy(ie->ie_buffer + le16_to_cpu(ie->ie_length), vs_ie, vs_ie->len + 2); le16_unaligned_add_cpu(&ie->ie_length, vs_ie->len + 2); diff --git a/drivers/net/wireless/marvell/mwifiex/uap_cmd.c b/drivers/net/wireless/marvell/mwifiex/uap_cmd.c index 18f7d9bf30b2..0939a8c8f3ab 100644 --- a/drivers/net/wireless/marvell/mwifiex/uap_cmd.c +++ b/drivers/net/wireless/marvell/mwifiex/uap_cmd.c @@ -265,6 +265,8 @@ mwifiex_set_uap_rates(struct mwifiex_uap_bss_param *bss_cfg, rate_ie = (void *)cfg80211_find_ie(WLAN_EID_SUPP_RATES, var_pos, len); if (rate_ie) { + if (rate_ie->len > MWIFIEX_SUPPORTED_RATES) + return; memcpy(bss_cfg->rates, rate_ie + 1, rate_ie->len); rate_len = rate_ie->len; } @@ -272,8 +274,11 @@ mwifiex_set_uap_rates(struct mwifiex_uap_bss_param *bss_cfg, rate_ie = (void *)cfg80211_find_ie(WLAN_EID_EXT_SUPP_RATES, params->beacon.tail, params->beacon.tail_len); - if (rate_ie) + if (rate_ie) { + if (rate_ie->len > MWIFIEX_SUPPORTED_RATES - rate_len) + return; memcpy(bss_cfg->rates + rate_len, rate_ie + 1, rate_ie->len); + } return; } @@ -391,6 +396,8 @@ mwifiex_set_wmm_params(struct mwifiex_private *priv, params->beacon.tail_len); if (vendor_ie) { wmm_ie = vendor_ie; + if (*(wmm_ie + 1) > sizeof(struct mwifiex_types_wmm_info)) + return; memcpy(&bss_cfg->wmm_info, wmm_ie + sizeof(struct ieee_types_header), *(wmm_ie + 1)); priv->wmm_enabled = 1; From 968dcfb4905245dc64d65312c0d17692fa087b99 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 29 Aug 2019 11:13:46 +0300 Subject: [PATCH 046/169] iwlwifi: assign directly to iwl_trans->cfg in QuZ detection We were erroneously assigning the new configuration to a local variable cfg, but that was not being assigned to anything, so the change was getting lost. Assign directly to iwl_trans->cfg instead. Fixes: 5a8c31aa6357 ("iwlwifi: pcie: fix recognition of QuZ devices") Cc: stable@vger.kernel.org # 5.2 Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index d9ed53b7c768..3b12e7ad35e1 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1070,18 +1070,18 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* same thing for QuZ... */ if (iwl_trans->hw_rev == CSR_HW_REV_TYPE_QUZ) { - if (cfg == &iwl_ax101_cfg_qu_hr) - cfg = &iwl_ax101_cfg_quz_hr; - else if (cfg == &iwl_ax201_cfg_qu_hr) - cfg = &iwl_ax201_cfg_quz_hr; - else if (cfg == &iwl9461_2ac_cfg_qu_b0_jf_b0) - cfg = &iwl9461_2ac_cfg_quz_a0_jf_b0_soc; - else if (cfg == &iwl9462_2ac_cfg_qu_b0_jf_b0) - cfg = &iwl9462_2ac_cfg_quz_a0_jf_b0_soc; - else if (cfg == &iwl9560_2ac_cfg_qu_b0_jf_b0) - cfg = &iwl9560_2ac_cfg_quz_a0_jf_b0_soc; - else if (cfg == &iwl9560_2ac_160_cfg_qu_b0_jf_b0) - cfg = &iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc; + if (iwl_trans->cfg == &iwl_ax101_cfg_qu_hr) + iwl_trans->cfg = &iwl_ax101_cfg_quz_hr; + else if (iwl_trans->cfg == &iwl_ax201_cfg_qu_hr) + iwl_trans->cfg = &iwl_ax201_cfg_quz_hr; + else if (iwl_trans->cfg == &iwl9461_2ac_cfg_qu_b0_jf_b0) + iwl_trans->cfg = &iwl9461_2ac_cfg_quz_a0_jf_b0_soc; + else if (iwl_trans->cfg == &iwl9462_2ac_cfg_qu_b0_jf_b0) + iwl_trans->cfg = &iwl9462_2ac_cfg_quz_a0_jf_b0_soc; + else if (iwl_trans->cfg == &iwl9560_2ac_cfg_qu_b0_jf_b0) + iwl_trans->cfg = &iwl9560_2ac_cfg_quz_a0_jf_b0_soc; + else if (iwl_trans->cfg == &iwl9560_2ac_160_cfg_qu_b0_jf_b0) + iwl_trans->cfg = &iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc; } #endif From 14d5e14c8a6c257eb322ddeb294ac4c243a7d2e1 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 23 Aug 2019 14:48:03 +0200 Subject: [PATCH 047/169] rt2x00: clear up IV's on key removal After looking at code I realized that my previous fix 95844124385e ("rt2x00: clear IV's on start to fix AP mode regression") was incomplete. We can still have wrong IV's after re-keyring. To fix that, clear up IV's also on key removal. Fixes: 710e6cc1595e ("rt2800: do not nullify initialization vector data") Signed-off-by: Stanislaw Gruszka tested-by: Emil Karlson Signed-off-by: Kalle Valo --- .../net/wireless/ralink/rt2x00/rt2800lib.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index ecbe78b8027b..28e2de04834e 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -1654,13 +1654,18 @@ static void rt2800_config_wcid_attr_cipher(struct rt2x00_dev *rt2x00dev, offset = MAC_IVEIV_ENTRY(key->hw_key_idx); - rt2800_register_multiread(rt2x00dev, offset, - &iveiv_entry, sizeof(iveiv_entry)); - if ((crypto->cipher == CIPHER_TKIP) || - (crypto->cipher == CIPHER_TKIP_NO_MIC) || - (crypto->cipher == CIPHER_AES)) - iveiv_entry.iv[3] |= 0x20; - iveiv_entry.iv[3] |= key->keyidx << 6; + if (crypto->cmd == SET_KEY) { + rt2800_register_multiread(rt2x00dev, offset, + &iveiv_entry, sizeof(iveiv_entry)); + if ((crypto->cipher == CIPHER_TKIP) || + (crypto->cipher == CIPHER_TKIP_NO_MIC) || + (crypto->cipher == CIPHER_AES)) + iveiv_entry.iv[3] |= 0x20; + iveiv_entry.iv[3] |= key->keyidx << 6; + } else { + memset(&iveiv_entry, 0, sizeof(iveiv_entry)); + } + rt2800_register_multiwrite(rt2x00dev, offset, &iveiv_entry, sizeof(iveiv_entry)); } From 13fa451568ab9e8b3074ef741477c7938c713c42 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 29 Aug 2019 13:29:59 +0200 Subject: [PATCH 048/169] Revert "rt2800: enable TX_PIN_CFG_LNA_PE_ bits per band" This reverts commit 9ad3b55654455258a9463384edb40077439d879f. As reported by Sergey: "I got some problem after upgrade kernel to 5.2 version (debian testing linux-image-5.2.0-2-amd64). 5Ghz client stopped to see AP. Some tests with 1metre distance between client-AP: 2.4Ghz -22dBm, for 5Ghz - 53dBm !, for longer distance (8m + walls) 2.4 - 61dBm, 5Ghz not visible." It was identified that rx signal level degradation was caused by 9ad3b5565445 ("rt2800: enable TX_PIN_CFG_LNA_PE_ bits per band"). So revert this commit. Cc: # v5.1+ Reported-and-tested-by: Sergey Maranchuk Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/ralink/rt2x00/rt2800lib.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index 28e2de04834e..f1cdcd61c54a 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -4242,24 +4242,18 @@ static void rt2800_config_channel(struct rt2x00_dev *rt2x00dev, switch (rt2x00dev->default_ant.rx_chain_num) { case 3: /* Turn on tertiary LNAs */ - rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_A2_EN, - rf->channel > 14); - rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_G2_EN, - rf->channel <= 14); + rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_A2_EN, 1); + rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_G2_EN, 1); /* fall-through */ case 2: /* Turn on secondary LNAs */ - rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_A1_EN, - rf->channel > 14); - rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_G1_EN, - rf->channel <= 14); + rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_A1_EN, 1); + rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_G1_EN, 1); /* fall-through */ case 1: /* Turn on primary LNAs */ - rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_A0_EN, - rf->channel > 14); - rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_G0_EN, - rf->channel <= 14); + rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_A0_EN, 1); + rt2x00_set_field32(&tx_pin, TX_PIN_CFG_LNA_PE_G0_EN, 1); break; } From 8b51dc7291473093c821195c4b6af85fadedbc2f Mon Sep 17 00:00:00 2001 From: Hui Peng Date: Mon, 19 Aug 2019 18:02:29 -0400 Subject: [PATCH 049/169] rsi: fix a double free bug in rsi_91x_deinit() `dev` (struct rsi_91x_usbdev *) field of adapter (struct rsi_91x_usbdev *) is allocated and initialized in `rsi_init_usb_interface`. If any error is detected in information read from the device side, `rsi_init_usb_interface` will be freed. However, in the higher level error handling code in `rsi_probe`, if error is detected, `rsi_91x_deinit` is called again, in which `dev` will be freed again, resulting double free. This patch fixes the double free by removing the free operation on `dev` in `rsi_init_usb_interface`, because `rsi_91x_deinit` is also used in `rsi_disconnect`, in that code path, the `dev` field is not (and thus needs to be) freed. This bug was found in v4.19, but is also present in the latest version of kernel. Fixes CVE-2019-15504. Reported-by: Hui Peng Reported-by: Mathias Payer Signed-off-by: Hui Peng Reviewed-by: Guenter Roeck Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_usb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c index f5048d4b8cb6..760eaffeebd6 100644 --- a/drivers/net/wireless/rsi/rsi_91x_usb.c +++ b/drivers/net/wireless/rsi/rsi_91x_usb.c @@ -645,7 +645,6 @@ static int rsi_init_usb_interface(struct rsi_hw *adapter, kfree(rsi_dev->tx_buffer); fail_eps: - kfree(rsi_dev); return status; } From 8ad8e02c2fa70cfddc1ded53ba9001c9d444075d Mon Sep 17 00:00:00 2001 From: Jan Kaisrlik Date: Tue, 20 Aug 2019 13:42:29 +0200 Subject: [PATCH 050/169] Revert "mmc: core: do not retry CMD6 in __mmc_switch()" Turns out the commit 3a0681c7448b ("mmc: core: do not retry CMD6 in __mmc_switch()") breaks initialization of a Toshiba THGBMNG5 eMMC card, when using the meson-gx-mmc.c driver on a custom board based on Amlogic A113D. The CMD6 that switches the card into HS200 mode is then one that fails and according to the below printed messages from the log: [ 1.648951] mmc0: mmc_select_hs200 failed, error -84 [ 1.648988] mmc0: error -84 whilst initialising MMC card After some analyze, it turns out that adding a delay of ~5ms inside mmc_select_bus_width() but after mmc_compare_ext_csds() has been executed, also fixes the problem. Adding yet some more debug code, trying to figure out if potentially the card could be in a busy state, both by using CMD13 and ->card_busy() ops concluded that this was not the case. Therefore, let's simply revert the commit that dropped support for retrying of CMD6, as this also fixes the problem. Fixes: 3a0681c7448b ("mmc: core: do not retry CMD6 in __mmc_switch()") Cc: stable@vger.kernel.org Signed-off-by: Jan Kaisrlik Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c index 74e4364bc9fb..09113b9ad679 100644 --- a/drivers/mmc/core/mmc_ops.c +++ b/drivers/mmc/core/mmc_ops.c @@ -564,7 +564,7 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value, if (index == EXT_CSD_SANITIZE_START) cmd.sanitize_busy = true; - err = mmc_wait_for_cmd(host, &cmd, 0); + err = mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES); if (err) goto out; From 88209141392a4a2521a2f67c13d7db5e84efbb58 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Fri, 30 Aug 2019 15:13:53 -0300 Subject: [PATCH 051/169] netfilter: nft_fib_netdev: Terminate rule eval if protocol=IPv6 and ipv6 module is disabled If IPv6 is disabled on boot (ipv6.disable=1), but nft_fib_inet ends up dealing with a IPv6 packet, it causes a kernel panic in fib6_node_lookup_1(), crashing in bad_page_fault. The panic is caused by trying to deference a very low address (0x38 in ppc64le), due to ipv6.fib6_main_tbl = NULL. BUG: Kernel NULL pointer dereference at 0x00000038 The kernel panic was reproduced in a host that disabled IPv6 on boot and have to process guest packets (coming from a bridge) using it's ip6tables. Terminate rule evaluation when packet protocol is IPv6 but the ipv6 module is not loaded. Signed-off-by: Leonardo Bras Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_fib_netdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c index 2cf3f32fe6d2..a2e726ae7f07 100644 --- a/net/netfilter/nft_fib_netdev.c +++ b/net/netfilter/nft_fib_netdev.c @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -34,6 +35,8 @@ static void nft_fib_netdev_eval(const struct nft_expr *expr, } break; case ETH_P_IPV6: + if (!ipv6_mod_enabled()) + break; switch (priv->result) { case NFT_FIB_RESULT_OIF: case NFT_FIB_RESULT_OIFNAME: From b067fa009c884401d23846251031c1f14d8a9c77 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 2 Sep 2019 19:37:42 +0200 Subject: [PATCH 052/169] netfilter: ctnetlink: honor IPS_OFFLOAD flag If this flag is set, timeout and state are irrelevant to userspace. Fixes: 90964016e5d3 ("netfilter: nf_conntrack: add IPS_OFFLOAD status bit") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6aa01eb6fe99..e2d13cd18875 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -553,10 +553,8 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, goto nla_put_failure; if (ctnetlink_dump_status(skb, ct) < 0 || - ctnetlink_dump_timeout(skb, ct) < 0 || ctnetlink_dump_acct(skb, ct, type) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0 || - ctnetlink_dump_protoinfo(skb, ct) < 0 || ctnetlink_dump_helpinfo(skb, ct) < 0 || ctnetlink_dump_mark(skb, ct) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || @@ -568,6 +566,11 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, ctnetlink_dump_ct_synproxy(skb, ct) < 0) goto nla_put_failure; + if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) && + (ctnetlink_dump_timeout(skb, ct) < 0 || + ctnetlink_dump_protoinfo(skb, ct) < 0)) + goto nla_put_failure; + nlmsg_end(skb, nlh); return skb->len; From 110e48725db6262f260f10727d0fb2d3d25895e4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 2 Sep 2019 19:37:43 +0200 Subject: [PATCH 053/169] netfilter: nf_flow_table: set default timeout after successful insertion Set up the default timeout for this new entry otherwise the garbage collector might quickly remove it right after the flowtable insertion. Fixes: ac2a66665e23 ("netfilter: add generic flow table infrastructure") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 80a8f9ae4c93..a0b4bf654de2 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -217,7 +217,7 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) return err; } - flow->timeout = (u32)jiffies; + flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; return 0; } EXPORT_SYMBOL_GPL(flow_offload_add); From 689379c2f383b1fdfdff03e84cf659daf62f2088 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Fri, 30 Aug 2019 15:37:45 +0800 Subject: [PATCH 054/169] dmaengine: sprd: Fix the DMA link-list configuration For the Spreadtrum DMA link-list mode, when the DMA engine got a slave hardware request, which will trigger the DMA engine to load the DMA configuration from the link-list memory automatically. But before the slave hardware request, the slave will get an incorrect residue due to the first node used to trigger the link-list was configured as the last source address and destination address. Thus we should make sure the first node was configured the start source address and destination address, which can fix this issue. Fixes: 4ac695464763 ("dmaengine: sprd: Support DMA link-list mode") Signed-off-by: Baolin Wang Link: https://lore.kernel.org/r/77868edb7aff9d5cb12ac3af8827ef2e244441a6.1567150471.git.baolin.wang@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/sprd-dma.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/dma/sprd-dma.c b/drivers/dma/sprd-dma.c index baac476c8622..525dc7338fe3 100644 --- a/drivers/dma/sprd-dma.c +++ b/drivers/dma/sprd-dma.c @@ -908,6 +908,7 @@ sprd_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, struct sprd_dma_chn *schan = to_sprd_dma_chan(chan); struct dma_slave_config *slave_cfg = &schan->slave_cfg; dma_addr_t src = 0, dst = 0; + dma_addr_t start_src = 0, start_dst = 0; struct sprd_dma_desc *sdesc; struct scatterlist *sg; u32 len = 0; @@ -954,6 +955,11 @@ sprd_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, dst = sg_dma_address(sg); } + if (!i) { + start_src = src; + start_dst = dst; + } + /* * The link-list mode needs at least 2 link-list * configurations. If there is only one sg, it doesn't @@ -970,8 +976,8 @@ sprd_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, } } - ret = sprd_dma_fill_desc(chan, &sdesc->chn_hw, 0, 0, src, dst, len, - dir, flags, slave_cfg); + ret = sprd_dma_fill_desc(chan, &sdesc->chn_hw, 0, 0, start_src, + start_dst, len, dir, flags, slave_cfg); if (ret) { kfree(sdesc); return NULL; From cf24aac38698bfa1d021afd3883df3c4c65143a4 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 2 Sep 2019 20:44:03 +0900 Subject: [PATCH 055/169] dmaengine: rcar-dmac: Fix DMACHCLR handling if iommu is mapped The commit 20c169aceb45 ("dmaengine: rcar-dmac: clear pertinence number of channels") forgets to clear the last channel by DMACHCLR in rcar_dmac_init() (and doesn't need to clear the first channel) if iommu is mapped to the device. So, this patch fixes it by using "channels_mask" bitfield. Note that the hardware and driver don't support more than 32 bits in DMACHCLR register anyway, so this patch should reject more than 32 channels in rcar_dmac_parse_of(). Fixes: 20c169aceb459575 ("dmaengine: rcar-dmac: clear pertinence number of channels") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Simon Horman Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/1567424643-26629-1-git-send-email-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Vinod Koul --- drivers/dma/sh/rcar-dmac.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 9c41a4e42575..1072c450c37a 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -192,6 +192,7 @@ struct rcar_dmac_chan { * @iomem: remapped I/O memory base * @n_channels: number of available channels * @channels: array of DMAC channels + * @channels_mask: bitfield of which DMA channels are managed by this driver * @modules: bitmask of client modules in use */ struct rcar_dmac { @@ -202,6 +203,7 @@ struct rcar_dmac { unsigned int n_channels; struct rcar_dmac_chan *channels; + unsigned int channels_mask; DECLARE_BITMAP(modules, 256); }; @@ -438,7 +440,7 @@ static int rcar_dmac_init(struct rcar_dmac *dmac) u16 dmaor; /* Clear all channels and enable the DMAC globally. */ - rcar_dmac_write(dmac, RCAR_DMACHCLR, GENMASK(dmac->n_channels - 1, 0)); + rcar_dmac_write(dmac, RCAR_DMACHCLR, dmac->channels_mask); rcar_dmac_write(dmac, RCAR_DMAOR, RCAR_DMAOR_PRI_FIXED | RCAR_DMAOR_DME); @@ -814,6 +816,9 @@ static void rcar_dmac_stop_all_chan(struct rcar_dmac *dmac) for (i = 0; i < dmac->n_channels; ++i) { struct rcar_dmac_chan *chan = &dmac->channels[i]; + if (!(dmac->channels_mask & BIT(i))) + continue; + /* Stop and reinitialize the channel. */ spin_lock_irq(&chan->lock); rcar_dmac_chan_halt(chan); @@ -1776,6 +1781,8 @@ static int rcar_dmac_chan_probe(struct rcar_dmac *dmac, return 0; } +#define RCAR_DMAC_MAX_CHANNELS 32 + static int rcar_dmac_parse_of(struct device *dev, struct rcar_dmac *dmac) { struct device_node *np = dev->of_node; @@ -1787,12 +1794,16 @@ static int rcar_dmac_parse_of(struct device *dev, struct rcar_dmac *dmac) return ret; } - if (dmac->n_channels <= 0 || dmac->n_channels >= 100) { + /* The hardware and driver don't support more than 32 bits in CHCLR */ + if (dmac->n_channels <= 0 || + dmac->n_channels >= RCAR_DMAC_MAX_CHANNELS) { dev_err(dev, "invalid number of channels %u\n", dmac->n_channels); return -EINVAL; } + dmac->channels_mask = GENMASK(dmac->n_channels - 1, 0); + return 0; } @@ -1802,7 +1813,6 @@ static int rcar_dmac_probe(struct platform_device *pdev) DMA_SLAVE_BUSWIDTH_2_BYTES | DMA_SLAVE_BUSWIDTH_4_BYTES | DMA_SLAVE_BUSWIDTH_8_BYTES | DMA_SLAVE_BUSWIDTH_16_BYTES | DMA_SLAVE_BUSWIDTH_32_BYTES | DMA_SLAVE_BUSWIDTH_64_BYTES; - unsigned int channels_offset = 0; struct dma_device *engine; struct rcar_dmac *dmac; struct resource *mem; @@ -1831,10 +1841,8 @@ static int rcar_dmac_probe(struct platform_device *pdev) * level we can't disable it selectively, so ignore channel 0 for now if * the device is part of an IOMMU group. */ - if (device_iommu_mapped(&pdev->dev)) { - dmac->n_channels--; - channels_offset = 1; - } + if (device_iommu_mapped(&pdev->dev)) + dmac->channels_mask &= ~BIT(0); dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels, sizeof(*dmac->channels), GFP_KERNEL); @@ -1892,8 +1900,10 @@ static int rcar_dmac_probe(struct platform_device *pdev) INIT_LIST_HEAD(&engine->channels); for (i = 0; i < dmac->n_channels; ++i) { - ret = rcar_dmac_chan_probe(dmac, &dmac->channels[i], - i + channels_offset); + if (!(dmac->channels_mask & BIT(i))) + continue; + + ret = rcar_dmac_chan_probe(dmac, &dmac->channels[i], i); if (ret < 0) goto error; } From 2a36c16efab254dd6017efeb35ad88ecc96f2328 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 4 Sep 2019 13:53:27 +0800 Subject: [PATCH 056/169] ALSA: hda/realtek - Fix the problem of two front mics on a ThinkCentre This ThinkCentre machine has a new realtek codec alc222, it is not in the support list, we add it in the realtek.c then this machine can apply FIXUPs for the realtek codec. And this machine has two front mics which can't be handled by PA so far, it uses the pin 0x18 and 0x19 as the front mics, as a result the existing FIXUP ALC294_FIXUP_LENOVO_MIC_LOCATION doesn't work on this machine. Fortunately another FIXUP ALC283_FIXUP_HEADSET_MIC also can change the location for one of the two mics on this machine. Link: https://lore.kernel.org/r/20190904055327.9883-1-hui.wang@canonical.com Signed-off-by: Hui Wang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 453e203ac7b9..c1ddfd2fac52 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7087,6 +7087,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x312a, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x312f, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), + SND_PCI_QUIRK(0x17aa, 0x3151, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI), @@ -8961,6 +8962,7 @@ static int patch_alc680(struct hda_codec *codec) static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0215, "ALC215", patch_alc269), HDA_CODEC_ENTRY(0x10ec0221, "ALC221", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0222, "ALC222", patch_alc269), HDA_CODEC_ENTRY(0x10ec0225, "ALC225", patch_alc269), HDA_CODEC_ENTRY(0x10ec0231, "ALC231", patch_alc269), HDA_CODEC_ENTRY(0x10ec0233, "ALC233", patch_alc269), From 93d2c4de8d8129b97ee1e1a222aedb0719d2fcd9 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 28 Aug 2019 13:36:59 +0800 Subject: [PATCH 057/169] vhost/test: fix build for vhost test Since below commit, callers need to specify the iov_limit in vhost_dev_init() explicitly. Fixes: b46a0bf78ad7 ("vhost: fix OOB in get_rx_bufs()") Cc: stable@vger.kernel.org Signed-off-by: Tiwei Bie Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 9e90e969af55..ac4f762c4f65 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -115,7 +115,7 @@ static int vhost_test_open(struct inode *inode, struct file *f) dev = &n->dev; vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ]; n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; - vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX); + vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV); f->private_data = n; From 264b563b8675771834419057cbe076c1a41fb666 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 28 Aug 2019 13:37:00 +0800 Subject: [PATCH 058/169] vhost/test: fix build for vhost test Since vhost_exceeds_weight() was introduced, callers need to specify the packet weight and byte weight in vhost_dev_init(). Note that, the packet weight isn't counted in this patch to keep the original behavior unchanged. Fixes: e82b9b0727ff ("vhost: introduce vhost_exceeds_weight()") Cc: stable@vger.kernel.org Signed-off-by: Tiwei Bie Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/test.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index ac4f762c4f65..7804869c6a31 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -22,6 +22,12 @@ * Using this limit prevents one virtqueue from starving others. */ #define VHOST_TEST_WEIGHT 0x80000 +/* Max number of packets transferred before requeueing the job. + * Using this limit prevents one virtqueue from starving others with + * pkts. + */ +#define VHOST_TEST_PKT_WEIGHT 256 + enum { VHOST_TEST_VQ = 0, VHOST_TEST_VQ_MAX = 1, @@ -80,10 +86,8 @@ static void handle_vq(struct vhost_test *n) } vhost_add_used_and_signal(&n->dev, vq, head, 0); total_len += len; - if (unlikely(total_len >= VHOST_TEST_WEIGHT)) { - vhost_poll_queue(&vq->poll); + if (unlikely(vhost_exceeds_weight(vq, 0, total_len))) break; - } } mutex_unlock(&vq->mutex); @@ -115,7 +119,8 @@ static int vhost_test_open(struct inode *inode, struct file *f) dev = &n->dev; vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ]; n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; - vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV); + vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV, + VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT); f->private_data = n; From 718be6bab26fb01fa0147a2b614a3eefd96a2957 Mon Sep 17 00:00:00 2001 From: ? jiang Date: Tue, 20 Aug 2019 02:51:23 +0000 Subject: [PATCH 059/169] virtio-net: lower min ring num_free for efficiency This change lowers ring buffer reclaim threshold from 1/2*queue to budget for better performance. According to our test with qemu + dpdk, packet dropping happens when the guest is not able to provide free buffer in avail ring timely with default 1/2*queue. The value in the patch has been tested and does show better performance. Test setup: iperf3 to generate packets to guest (total 30mins, pps 400k, UDP) avg packets drop before: 2842 avg packets drop after: 360(-87.3%) Further, current code suffers from a starvation problem: the amount of work done by try_fill_recv is not bounded by the budget parameter, thus (with large queues) once in a while userspace gets blocked for a long time while queue is being refilled. Trigger refills earlier to make sure the amount of work to do is limited. Signed-off-by: jiangkidd Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 4f3de0ac8b0b..ba98e0971b84 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1331,7 +1331,7 @@ static int virtnet_receive(struct receive_queue *rq, int budget, } } - if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) { + if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { if (!try_fill_recv(vi, rq, GFP_ATOMIC)) schedule_delayed_work(&vi->refill, 0); } From 896fc242bc1d261c1178838487a0a54b260625cc Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 20 Aug 2019 20:36:32 +0800 Subject: [PATCH 060/169] vhost: Remove unnecessary variable It is unnecessary to use ret variable to return the error code, just return the error code directly. Signed-off-by: Yunsheng Lin Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 0536f8526359..1ac9de250319 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -203,7 +203,6 @@ EXPORT_SYMBOL_GPL(vhost_poll_init); int vhost_poll_start(struct vhost_poll *poll, struct file *file) { __poll_t mask; - int ret = 0; if (poll->wqh) return 0; @@ -213,10 +212,10 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file) vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask)); if (mask & EPOLLERR) { vhost_poll_stop(poll); - ret = -EINVAL; + return -EINVAL; } - return ret; + return 0; } EXPORT_SYMBOL_GPL(vhost_poll_start); From 3d2c7d37047557175fb41de044091050b5f0d73b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sat, 10 Aug 2019 13:53:21 -0400 Subject: [PATCH 061/169] Revert "vhost: access vq metadata through kernel virtual address" This reverts commit 7f466032dc ("vhost: access vq metadata through kernel virtual address"). The commit caused a bunch of issues, and while commit 73f628ec9e ("vhost: disable metadata prefetch optimization") disabled the optimization it's not nice to keep lots of dead code around. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 515 +----------------------------------------- drivers/vhost/vhost.h | 41 ---- 2 files changed, 3 insertions(+), 553 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 1ac9de250319..5dc174ac8cac 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -297,160 +297,6 @@ static void vhost_vq_meta_reset(struct vhost_dev *d) __vhost_vq_meta_reset(d->vqs[i]); } -#if VHOST_ARCH_CAN_ACCEL_UACCESS -static void vhost_map_unprefetch(struct vhost_map *map) -{ - kfree(map->pages); - map->pages = NULL; - map->npages = 0; - map->addr = NULL; -} - -static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq) -{ - struct vhost_map *map[VHOST_NUM_ADDRS]; - int i; - - spin_lock(&vq->mmu_lock); - for (i = 0; i < VHOST_NUM_ADDRS; i++) { - map[i] = rcu_dereference_protected(vq->maps[i], - lockdep_is_held(&vq->mmu_lock)); - if (map[i]) - rcu_assign_pointer(vq->maps[i], NULL); - } - spin_unlock(&vq->mmu_lock); - - synchronize_rcu(); - - for (i = 0; i < VHOST_NUM_ADDRS; i++) - if (map[i]) - vhost_map_unprefetch(map[i]); - -} - -static void vhost_reset_vq_maps(struct vhost_virtqueue *vq) -{ - int i; - - vhost_uninit_vq_maps(vq); - for (i = 0; i < VHOST_NUM_ADDRS; i++) - vq->uaddrs[i].size = 0; -} - -static bool vhost_map_range_overlap(struct vhost_uaddr *uaddr, - unsigned long start, - unsigned long end) -{ - if (unlikely(!uaddr->size)) - return false; - - return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size); -} - -static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq, - int index, - unsigned long start, - unsigned long end) -{ - struct vhost_uaddr *uaddr = &vq->uaddrs[index]; - struct vhost_map *map; - int i; - - if (!vhost_map_range_overlap(uaddr, start, end)) - return; - - spin_lock(&vq->mmu_lock); - ++vq->invalidate_count; - - map = rcu_dereference_protected(vq->maps[index], - lockdep_is_held(&vq->mmu_lock)); - if (map) { - if (uaddr->write) { - for (i = 0; i < map->npages; i++) - set_page_dirty(map->pages[i]); - } - rcu_assign_pointer(vq->maps[index], NULL); - } - spin_unlock(&vq->mmu_lock); - - if (map) { - synchronize_rcu(); - vhost_map_unprefetch(map); - } -} - -static void vhost_invalidate_vq_end(struct vhost_virtqueue *vq, - int index, - unsigned long start, - unsigned long end) -{ - if (!vhost_map_range_overlap(&vq->uaddrs[index], start, end)) - return; - - spin_lock(&vq->mmu_lock); - --vq->invalidate_count; - spin_unlock(&vq->mmu_lock); -} - -static int vhost_invalidate_range_start(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct vhost_dev *dev = container_of(mn, struct vhost_dev, - mmu_notifier); - int i, j; - - if (!mmu_notifier_range_blockable(range)) - return -EAGAIN; - - for (i = 0; i < dev->nvqs; i++) { - struct vhost_virtqueue *vq = dev->vqs[i]; - - for (j = 0; j < VHOST_NUM_ADDRS; j++) - vhost_invalidate_vq_start(vq, j, - range->start, - range->end); - } - - return 0; -} - -static void vhost_invalidate_range_end(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct vhost_dev *dev = container_of(mn, struct vhost_dev, - mmu_notifier); - int i, j; - - for (i = 0; i < dev->nvqs; i++) { - struct vhost_virtqueue *vq = dev->vqs[i]; - - for (j = 0; j < VHOST_NUM_ADDRS; j++) - vhost_invalidate_vq_end(vq, j, - range->start, - range->end); - } -} - -static const struct mmu_notifier_ops vhost_mmu_notifier_ops = { - .invalidate_range_start = vhost_invalidate_range_start, - .invalidate_range_end = vhost_invalidate_range_end, -}; - -static void vhost_init_maps(struct vhost_dev *dev) -{ - struct vhost_virtqueue *vq; - int i, j; - - dev->mmu_notifier.ops = &vhost_mmu_notifier_ops; - - for (i = 0; i < dev->nvqs; ++i) { - vq = dev->vqs[i]; - for (j = 0; j < VHOST_NUM_ADDRS; j++) - RCU_INIT_POINTER(vq->maps[j], NULL); - } -} -#endif - static void vhost_vq_reset(struct vhost_dev *dev, struct vhost_virtqueue *vq) { @@ -479,11 +325,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; - vq->invalidate_count = 0; __vhost_vq_meta_reset(vq); -#if VHOST_ARCH_CAN_ACCEL_UACCESS - vhost_reset_vq_maps(vq); -#endif } static int vhost_worker(void *data) @@ -633,9 +475,7 @@ void vhost_dev_init(struct vhost_dev *dev, INIT_LIST_HEAD(&dev->read_list); INIT_LIST_HEAD(&dev->pending_list); spin_lock_init(&dev->iotlb_lock); -#if VHOST_ARCH_CAN_ACCEL_UACCESS - vhost_init_maps(dev); -#endif + for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; @@ -644,7 +484,6 @@ void vhost_dev_init(struct vhost_dev *dev, vq->heads = NULL; vq->dev = dev; mutex_init(&vq->mutex); - spin_lock_init(&vq->mmu_lock); vhost_vq_reset(dev, vq); if (vq->handle_kick) vhost_poll_init(&vq->poll, vq->handle_kick, @@ -724,18 +563,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev) if (err) goto err_cgroup; -#if VHOST_ARCH_CAN_ACCEL_UACCESS - err = mmu_notifier_register(&dev->mmu_notifier, dev->mm); - if (err) - goto err_mmu_notifier; -#endif - return 0; - -#if VHOST_ARCH_CAN_ACCEL_UACCESS -err_mmu_notifier: - vhost_dev_free_iovecs(dev); -#endif err_cgroup: kthread_stop(worker); dev->worker = NULL; @@ -826,107 +654,6 @@ static void vhost_clear_msg(struct vhost_dev *dev) spin_unlock(&dev->iotlb_lock); } -#if VHOST_ARCH_CAN_ACCEL_UACCESS -static void vhost_setup_uaddr(struct vhost_virtqueue *vq, - int index, unsigned long uaddr, - size_t size, bool write) -{ - struct vhost_uaddr *addr = &vq->uaddrs[index]; - - addr->uaddr = uaddr; - addr->size = size; - addr->write = write; -} - -static void vhost_setup_vq_uaddr(struct vhost_virtqueue *vq) -{ - vhost_setup_uaddr(vq, VHOST_ADDR_DESC, - (unsigned long)vq->desc, - vhost_get_desc_size(vq, vq->num), - false); - vhost_setup_uaddr(vq, VHOST_ADDR_AVAIL, - (unsigned long)vq->avail, - vhost_get_avail_size(vq, vq->num), - false); - vhost_setup_uaddr(vq, VHOST_ADDR_USED, - (unsigned long)vq->used, - vhost_get_used_size(vq, vq->num), - true); -} - -static int vhost_map_prefetch(struct vhost_virtqueue *vq, - int index) -{ - struct vhost_map *map; - struct vhost_uaddr *uaddr = &vq->uaddrs[index]; - struct page **pages; - int npages = DIV_ROUND_UP(uaddr->size, PAGE_SIZE); - int npinned; - void *vaddr, *v; - int err; - int i; - - spin_lock(&vq->mmu_lock); - - err = -EFAULT; - if (vq->invalidate_count) - goto err; - - err = -ENOMEM; - map = kmalloc(sizeof(*map), GFP_ATOMIC); - if (!map) - goto err; - - pages = kmalloc_array(npages, sizeof(struct page *), GFP_ATOMIC); - if (!pages) - goto err_pages; - - err = EFAULT; - npinned = __get_user_pages_fast(uaddr->uaddr, npages, - uaddr->write, pages); - if (npinned > 0) - release_pages(pages, npinned); - if (npinned != npages) - goto err_gup; - - for (i = 0; i < npinned; i++) - if (PageHighMem(pages[i])) - goto err_gup; - - vaddr = v = page_address(pages[0]); - - /* For simplicity, fallback to userspace address if VA is not - * contigious. - */ - for (i = 1; i < npinned; i++) { - v += PAGE_SIZE; - if (v != page_address(pages[i])) - goto err_gup; - } - - map->addr = vaddr + (uaddr->uaddr & (PAGE_SIZE - 1)); - map->npages = npages; - map->pages = pages; - - rcu_assign_pointer(vq->maps[index], map); - /* No need for a synchronize_rcu(). This function should be - * called by dev->worker so we are serialized with all - * readers. - */ - spin_unlock(&vq->mmu_lock); - - return 0; - -err_gup: - kfree(pages); -err_pages: - kfree(map); -err: - spin_unlock(&vq->mmu_lock); - return err; -} -#endif - void vhost_dev_cleanup(struct vhost_dev *dev) { int i; @@ -956,16 +683,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev) kthread_stop(dev->worker); dev->worker = NULL; } - if (dev->mm) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - mmu_notifier_unregister(&dev->mmu_notifier, dev->mm); -#endif + if (dev->mm) mmput(dev->mm); - } -#if VHOST_ARCH_CAN_ACCEL_UACCESS - for (i = 0; i < dev->nvqs; i++) - vhost_uninit_vq_maps(dev->vqs[i]); -#endif dev->mm = NULL; } EXPORT_SYMBOL_GPL(vhost_dev_cleanup); @@ -1194,26 +913,6 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, static inline int vhost_put_avail_event(struct vhost_virtqueue *vq) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_used *used; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); - if (likely(map)) { - used = map->addr; - *((__virtio16 *)&used->ring[vq->num]) = - cpu_to_vhost16(vq, vq->avail_idx); - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), vhost_avail_event(vq)); } @@ -1222,27 +921,6 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq, struct vring_used_elem *head, int idx, int count) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_used *used; - size_t size; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); - if (likely(map)) { - used = map->addr; - size = count * sizeof(*head); - memcpy(used->ring + idx, head, size); - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_copy_to_user(vq, vq->used->ring + idx, head, count * sizeof(*head)); } @@ -1250,25 +928,6 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq, static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_used *used; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); - if (likely(map)) { - used = map->addr; - used->flags = cpu_to_vhost16(vq, vq->used_flags); - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), &vq->used->flags); } @@ -1276,25 +935,6 @@ static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) static inline int vhost_put_used_idx(struct vhost_virtqueue *vq) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_used *used; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); - if (likely(map)) { - used = map->addr; - used->idx = cpu_to_vhost16(vq, vq->last_used_idx); - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), &vq->used->idx); } @@ -1340,50 +980,12 @@ static void vhost_dev_unlock_vqs(struct vhost_dev *d) static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq, __virtio16 *idx) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_avail *avail; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); - if (likely(map)) { - avail = map->addr; - *idx = avail->idx; - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_get_avail(vq, *idx, &vq->avail->idx); } static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, __virtio16 *head, int idx) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_avail *avail; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); - if (likely(map)) { - avail = map->addr; - *head = avail->ring[idx & (vq->num - 1)]; - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_get_avail(vq, *head, &vq->avail->ring[idx & (vq->num - 1)]); } @@ -1391,98 +993,24 @@ static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq, __virtio16 *flags) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_avail *avail; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); - if (likely(map)) { - avail = map->addr; - *flags = avail->flags; - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_get_avail(vq, *flags, &vq->avail->flags); } static inline int vhost_get_used_event(struct vhost_virtqueue *vq, __virtio16 *event) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_avail *avail; - - if (!vq->iotlb) { - rcu_read_lock(); - map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); - if (likely(map)) { - avail = map->addr; - *event = (__virtio16)avail->ring[vq->num]; - rcu_read_unlock(); - return 0; - } - rcu_read_unlock(); - } -#endif - return vhost_get_avail(vq, *event, vhost_used_event(vq)); } static inline int vhost_get_used_idx(struct vhost_virtqueue *vq, __virtio16 *idx) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_used *used; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); - if (likely(map)) { - used = map->addr; - *idx = used->idx; - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_get_used(vq, *idx, &vq->used->idx); } static inline int vhost_get_desc(struct vhost_virtqueue *vq, struct vring_desc *desc, int idx) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_desc *d; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_DESC]); - if (likely(map)) { - d = map->addr; - *desc = *(d + idx); - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); } @@ -1823,32 +1351,12 @@ static bool iotlb_access_ok(struct vhost_virtqueue *vq, return true; } -#if VHOST_ARCH_CAN_ACCEL_UACCESS -static void vhost_vq_map_prefetch(struct vhost_virtqueue *vq) -{ - struct vhost_map __rcu *map; - int i; - - for (i = 0; i < VHOST_NUM_ADDRS; i++) { - rcu_read_lock(); - map = rcu_dereference(vq->maps[i]); - rcu_read_unlock(); - if (unlikely(!map)) - vhost_map_prefetch(vq, i); - } -} -#endif - int vq_meta_prefetch(struct vhost_virtqueue *vq) { unsigned int num = vq->num; - if (!vq->iotlb) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - vhost_vq_map_prefetch(vq); -#endif + if (!vq->iotlb) return 1; - } return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && @@ -2059,16 +1567,6 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d, mutex_lock(&vq->mutex); -#if VHOST_ARCH_CAN_ACCEL_UACCESS - /* Unregister MMU notifer to allow invalidation callback - * can access vq->uaddrs[] without holding a lock. - */ - if (d->mm) - mmu_notifier_unregister(&d->mmu_notifier, d->mm); - - vhost_uninit_vq_maps(vq); -#endif - switch (ioctl) { case VHOST_SET_VRING_NUM: r = vhost_vring_set_num(d, vq, argp); @@ -2080,13 +1578,6 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d, BUG(); } -#if VHOST_ARCH_CAN_ACCEL_UACCESS - vhost_setup_vq_uaddr(vq); - - if (d->mm) - mmu_notifier_register(&d->mmu_notifier, d->mm); -#endif - mutex_unlock(&vq->mutex); return r; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 42a8c2a13ab1..e9ed2722b633 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -12,9 +12,6 @@ #include #include #include -#include -#include -#include struct vhost_work; typedef void (*vhost_work_fn_t)(struct vhost_work *work); @@ -83,24 +80,6 @@ enum vhost_uaddr_type { VHOST_NUM_ADDRS = 3, }; -struct vhost_map { - int npages; - void *addr; - struct page **pages; -}; - -struct vhost_uaddr { - unsigned long uaddr; - size_t size; - bool write; -}; - -#if defined(CONFIG_MMU_NOTIFIER) && ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 0 -#define VHOST_ARCH_CAN_ACCEL_UACCESS 0 -#else -#define VHOST_ARCH_CAN_ACCEL_UACCESS 0 -#endif - /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; @@ -111,22 +90,7 @@ struct vhost_virtqueue { struct vring_desc __user *desc; struct vring_avail __user *avail; struct vring_used __user *used; - -#if VHOST_ARCH_CAN_ACCEL_UACCESS - /* Read by memory accessors, modified by meta data - * prefetching, MMU notifier and vring ioctl(). - * Synchonrized through mmu_lock (writers) and RCU (writers - * and readers). - */ - struct vhost_map __rcu *maps[VHOST_NUM_ADDRS]; - /* Read by MMU notifier, modified by vring ioctl(), - * synchronized through MMU notifier - * registering/unregistering. - */ - struct vhost_uaddr uaddrs[VHOST_NUM_ADDRS]; -#endif const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; - struct file *kick; struct eventfd_ctx *call_ctx; struct eventfd_ctx *error_ctx; @@ -181,8 +145,6 @@ struct vhost_virtqueue { bool user_be; #endif u32 busyloop_timeout; - spinlock_t mmu_lock; - int invalidate_count; }; struct vhost_msg_node { @@ -196,9 +158,6 @@ struct vhost_msg_node { struct vhost_dev { struct mm_struct *mm; -#ifdef CONFIG_MMU_NOTIFIER - struct mmu_notifier mmu_notifier; -#endif struct mutex mutex; struct vhost_virtqueue **vqs; int nvqs; From 02fa5d7b17a761f53ef1eedfc254e1f33bd226b0 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Tue, 20 Aug 2019 02:16:46 -0700 Subject: [PATCH 062/169] mm/balloon_compaction: suppress allocation warnings There is no reason to print warnings when balloon page allocation fails, as they are expected and can be handled gracefully. Since VMware balloon now uses balloon-compaction infrastructure, and suppressed these warnings before, it is also beneficial to suppress these warnings to keep the same behavior that the balloon had before. Cc: Jason Wang Signed-off-by: Nadav Amit Signed-off-by: Michael S. Tsirkin Reviewed-by: David Hildenbrand --- mm/balloon_compaction.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 798275a51887..26de020aae7b 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -124,7 +124,8 @@ EXPORT_SYMBOL_GPL(balloon_page_list_dequeue); struct page *balloon_page_alloc(void) { struct page *page = alloc_page(balloon_mapping_gfp_mask() | - __GFP_NOMEMALLOC | __GFP_NORETRY); + __GFP_NOMEMALLOC | __GFP_NORETRY | + __GFP_NOWARN); return page; } EXPORT_SYMBOL_GPL(balloon_page_alloc); From 8205d5d98ef7f155de211f5e2eb6ca03d95a5a60 Mon Sep 17 00:00:00 2001 From: Gustavo Romero Date: Wed, 4 Sep 2019 00:55:27 -0400 Subject: [PATCH 063/169] powerpc/tm: Fix FP/VMX unavailable exceptions inside a transaction When we take an FP unavailable exception in a transaction we have to account for the hardware FP TM checkpointed registers being incorrect. In this case for this process we know the current and checkpointed FP registers must be the same (since FP wasn't used inside the transaction) hence in the thread_struct we copy the current FP registers to the checkpointed ones. This copy is done in tm_reclaim_thread(). We use thread->ckpt_regs.msr to determine if FP was on when in userspace. thread->ckpt_regs.msr represents the state of the MSR when exiting userspace. This is setup by check_if_tm_restore_required(). Unfortunatley there is an optimisation in giveup_all() which returns early if tsk->thread.regs->msr (via local variable `usermsr`) has FP=VEC=VSX=SPE=0. This optimisation means that check_if_tm_restore_required() is not called and hence thread->ckpt_regs.msr is not updated and will contain an old value. This can happen if due to load_fp=255 we start a userspace process with MSR FP=1 and then we are context switched out. In this case thread->ckpt_regs.msr will contain FP=1. If that same process is then context switched in and load_fp overflows, MSR will have FP=0. If that process now enters a transaction and does an FP instruction, the FP unavailable will not update thread->ckpt_regs.msr (the bug) and MSR FP=1 will be retained in thread->ckpt_regs.msr. tm_reclaim_thread() will then not perform the required memcpy and the checkpointed FP regs in the thread struct will contain the wrong values. The code path for this happening is: Userspace: Kernel Start userspace with MSR FP/VEC/VSX/SPE=0 TM=1 < ----- ... tbegin bne fp instruction FP unavailable ---- > fp_unavailable_tm() tm_reclaim_current() tm_reclaim_thread() giveup_all() return early since FP/VMX/VSX=0 /* ckpt MSR not updated (Incorrect) */ tm_reclaim() /* thread_struct ckpt FP regs contain junk (OK) */ /* Sees ckpt MSR FP=1 (Incorrect) */ no memcpy() performed /* thread_struct ckpt FP regs not fixed (Incorrect) */ tm_recheckpoint() /* Put junk in hardware checkpoint FP regs */ .... < ----- Return to userspace with MSR TM=1 FP=1 with junk in the FP TM checkpoint TM rollback reads FP junk This is a data integrity problem for the current process as the FP registers are corrupted. It's also a security problem as the FP registers from one process may be leaked to another. This patch moves up check_if_tm_restore_required() in giveup_all() to ensure thread->ckpt_regs.msr is updated correctly. A simple testcase to replicate this will be posted to tools/testing/selftests/powerpc/tm/tm-poison.c Similarly for VMX. This fixes CVE-2019-15030. Fixes: f48e91e87e67 ("powerpc/tm: Fix FP and VMX register corruption") Cc: stable@vger.kernel.org # 4.12+ Signed-off-by: Gustavo Romero Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190904045529.23002-1-gromero@linux.vnet.ibm.com --- arch/powerpc/kernel/process.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8fc4de0d22b4..437b57068cf8 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -497,13 +497,14 @@ void giveup_all(struct task_struct *tsk) if (!tsk->thread.regs) return; + check_if_tm_restore_required(tsk); + usermsr = tsk->thread.regs->msr; if ((usermsr & msr_all_available) == 0) return; msr_check_and_set(msr_all_available); - check_if_tm_restore_required(tsk); WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC))); From a8318c13e79badb92bc6640704a64cc022a6eb97 Mon Sep 17 00:00:00 2001 From: Gustavo Romero Date: Wed, 4 Sep 2019 00:55:28 -0400 Subject: [PATCH 064/169] powerpc/tm: Fix restoring FP/VMX facility incorrectly on interrupts When in userspace and MSR FP=0 the hardware FP state is unrelated to the current process. This is extended for transactions where if tbegin is run with FP=0, the hardware checkpoint FP state will also be unrelated to the current process. Due to this, we need to ensure this hardware checkpoint is updated with the correct state before we enable FP for this process. Unfortunately we get this wrong when returning to a process from a hardware interrupt. A process that starts a transaction with FP=0 can take an interrupt. When the kernel returns back to that process, we change to FP=1 but with hardware checkpoint FP state not updated. If this transaction is then rolled back, the FP registers now contain the wrong state. The process looks like this: Userspace: Kernel Start userspace with MSR FP=0 TM=1 < ----- ... tbegin bne Hardware interrupt ---- > .... ret_from_except restore_math() /* sees FP=0 */ restore_fp() tm_active_with_fp() /* sees FP=1 (Incorrect) */ load_fp_state() FP = 0 -> 1 < ----- Return to userspace with MSR TM=1 FP=1 with junk in the FP TM checkpoint TM rollback reads FP junk When returning from the hardware exception, tm_active_with_fp() is incorrectly making restore_fp() call load_fp_state() which is setting FP=1. The fix is to remove tm_active_with_fp(). tm_active_with_fp() is attempting to handle the case where FP state has been changed inside a transaction. In this case the checkpointed and transactional FP state is different and hence we must restore the FP state (ie. we can't do lazy FP restore inside a transaction that's used FP). It's safe to remove tm_active_with_fp() as this case is handled by restore_tm_state(). restore_tm_state() detects if FP has been using inside a transaction and will set load_fp and call restore_math() to ensure the FP state (checkpoint and transaction) is restored. This is a data integrity problem for the current process as the FP registers are corrupted. It's also a security problem as the FP registers from one process may be leaked to another. Similarly for VMX. A simple testcase to replicate this will be posted to tools/testing/selftests/powerpc/tm/tm-poison.c This fixes CVE-2019-15031. Fixes: a7771176b439 ("powerpc: Don't enable FP/Altivec if not checkpointed") Cc: stable@vger.kernel.org # 4.15+ Signed-off-by: Gustavo Romero Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190904045529.23002-2-gromero@linux.vnet.ibm.com --- arch/powerpc/kernel/process.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 437b57068cf8..7a84c9f1778e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -101,21 +101,8 @@ static void check_if_tm_restore_required(struct task_struct *tsk) } } -static bool tm_active_with_fp(struct task_struct *tsk) -{ - return MSR_TM_ACTIVE(tsk->thread.regs->msr) && - (tsk->thread.ckpt_regs.msr & MSR_FP); -} - -static bool tm_active_with_altivec(struct task_struct *tsk) -{ - return MSR_TM_ACTIVE(tsk->thread.regs->msr) && - (tsk->thread.ckpt_regs.msr & MSR_VEC); -} #else static inline void check_if_tm_restore_required(struct task_struct *tsk) { } -static inline bool tm_active_with_fp(struct task_struct *tsk) { return false; } -static inline bool tm_active_with_altivec(struct task_struct *tsk) { return false; } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ bool strict_msr_control; @@ -252,7 +239,7 @@ EXPORT_SYMBOL(enable_kernel_fp); static int restore_fp(struct task_struct *tsk) { - if (tsk->thread.load_fp || tm_active_with_fp(tsk)) { + if (tsk->thread.load_fp) { load_fp_state(¤t->thread.fp_state); current->thread.load_fp++; return 1; @@ -334,8 +321,7 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread); static int restore_altivec(struct task_struct *tsk) { - if (cpu_has_feature(CPU_FTR_ALTIVEC) && - (tsk->thread.load_vec || tm_active_with_altivec(tsk))) { + if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) { load_vr_state(&tsk->thread.vr_state); tsk->thread.used_vr = 1; tsk->thread.load_vec++; From 6d0762b19c5963ff9e178e8af3626532ee04d93d Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Tue, 3 Sep 2019 17:10:42 +0800 Subject: [PATCH 065/169] Bluetooth: btrtl: Additional Realtek 8822CE Bluetooth devices The ASUS X412FA laptop contains a Realtek RTL8822CE device with an associated BT chip using a USB ID of 04ca:4005. This ID is added to the driver. The /sys/kernel/debug/usb/devices portion for this device is: T: Bus=01 Lev=01 Prnt=01 Port=09 Cnt=04 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 1.00 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=04ca ProdID=4005 Rev= 0.00 S: Manufacturer=Realtek S: Product=Bluetooth Radio S: SerialNumber=00e04c000001 C:* #Ifs= 2 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=204707 Signed-off-by: Jian-Hong Pan Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 5c67d41ca254..ba4149054304 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -384,6 +384,9 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x13d3, 0x3526), .driver_info = BTUSB_REALTEK }, { USB_DEVICE(0x0b05, 0x185c), .driver_info = BTUSB_REALTEK }, + /* Additional Realtek 8822CE Bluetooth devices */ + { USB_DEVICE(0x04ca, 0x4005), .driver_info = BTUSB_REALTEK }, + /* Silicon Wave based devices */ { USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_SWAVE }, From 31fb1bbdabb34d53e3d83f0353085d9b79977786 Mon Sep 17 00:00:00 2001 From: Harish Bandi Date: Wed, 4 Sep 2019 10:04:16 +0530 Subject: [PATCH 066/169] Bluetooth: hci_qca: disable irqs when spinlock is acquired Looks like Deadlock is observed in hci_qca while performing stress and stability tests. Since same lock is getting acquired from qca_wq_awake_rx and hci_ibs_tx_idle_timeout seeing spinlock recursion, irqs should be disable while acquiring the spinlock always. Signed-off-by: Harish Bandi Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_qca.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 9a970fd1975a..31dec435767b 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -309,13 +309,14 @@ static void qca_wq_awake_device(struct work_struct *work) ws_awake_device); struct hci_uart *hu = qca->hu; unsigned long retrans_delay; + unsigned long flags; BT_DBG("hu %p wq awake device", hu); /* Vote for serial clock */ serial_clock_vote(HCI_IBS_TX_VOTE_CLOCK_ON, hu); - spin_lock(&qca->hci_ibs_lock); + spin_lock_irqsave(&qca->hci_ibs_lock, flags); /* Send wake indication to device */ if (send_hci_ibs_cmd(HCI_IBS_WAKE_IND, hu) < 0) @@ -327,7 +328,7 @@ static void qca_wq_awake_device(struct work_struct *work) retrans_delay = msecs_to_jiffies(qca->wake_retrans); mod_timer(&qca->wake_retrans_timer, jiffies + retrans_delay); - spin_unlock(&qca->hci_ibs_lock); + spin_unlock_irqrestore(&qca->hci_ibs_lock, flags); /* Actually send the packets */ hci_uart_tx_wakeup(hu); @@ -338,12 +339,13 @@ static void qca_wq_awake_rx(struct work_struct *work) struct qca_data *qca = container_of(work, struct qca_data, ws_awake_rx); struct hci_uart *hu = qca->hu; + unsigned long flags; BT_DBG("hu %p wq awake rx", hu); serial_clock_vote(HCI_IBS_RX_VOTE_CLOCK_ON, hu); - spin_lock(&qca->hci_ibs_lock); + spin_lock_irqsave(&qca->hci_ibs_lock, flags); qca->rx_ibs_state = HCI_IBS_RX_AWAKE; /* Always acknowledge device wake up, @@ -354,7 +356,7 @@ static void qca_wq_awake_rx(struct work_struct *work) qca->ibs_sent_wacks++; - spin_unlock(&qca->hci_ibs_lock); + spin_unlock_irqrestore(&qca->hci_ibs_lock, flags); /* Actually send the packets */ hci_uart_tx_wakeup(hu); From d94dfd798c4839b642f534580109dc6dfc3901a9 Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Sat, 31 Aug 2019 14:23:40 -0500 Subject: [PATCH 067/169] Bluetooth: bpa10x: change return value When returning from bpa10x_send_frame, it is necessary to propagate any potential errno returned from usb_submit_urb. Signed-off-by: Navid Emamdoost Signed-off-by: Marcel Holtmann --- drivers/bluetooth/bpa10x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index a0e84538cec8..1fa58c059cbf 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -337,7 +337,7 @@ static int bpa10x_send_frame(struct hci_dev *hdev, struct sk_buff *skb) usb_free_urb(urb); - return 0; + return err; } static int bpa10x_set_diag(struct hci_dev *hdev, bool enable) From 1251201c0d34fadf69d56efa675c2b7dd0a90eca Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 4 Sep 2019 09:55:32 +0200 Subject: [PATCH 068/169] sched/core: Fix uclamp ABI bug, clean up and robustify sched_read_attr() ABI logic and code Thadeu Lima de Souza Cascardo reported that 'chrt' broke on recent kernels: $ chrt -p $$ chrt: failed to get pid 26306's policy: Argument list too long and he has root-caused the bug to the following commit increasing sched_attr size and breaking sched_read_attr() into returning -EFBIG: a509a7cd7974 ("sched/uclamp: Extend sched_setattr() to support utilization clamping") The other, bigger bug is that the whole sched_getattr() and sched_read_attr() logic of checking non-zero bits in new ABI components is arguably broken, and pretty much any extension of the ABI will spuriously break the ABI. That's way too fragile. Instead implement the perf syscall's extensible ABI instead, which we already implement on the sched_setattr() side: - if user-attributes have the same size as kernel attributes then the logic is unchanged. - if user-attributes are larger than the kernel knows about then simply skip the extra bits, but set attr->size to the (smaller) kernel size so that tooling can (in principle) handle older kernel as well. - if user-attributes are smaller than the kernel knows about then just copy whatever user-space can accept. Also clean up the whole logic: - Simplify the code flow - there's no need for 'ret' for example. - Standardize on 'kattr/uattr' and 'ksize/usize' naming to make sure we always know which side we are dealing with. - Why is it called 'read' when what it does is to copy to user? This code is so far away from VFS read() semantics that the naming is actively confusing. Name it sched_attr_copy_to_user() instead, which mirrors other copy_to_user() functionality. - Move the attr->size assignment from the head of sched_getattr() to the sched_attr_copy_to_user() function. Nothing else within the kernel should care about the size of the structure. With these fixes the sched_getattr() syscall now nicely supports an extensible ABI in both a forward and backward compatible fashion, and will also fix the chrt bug. As an added bonus the bogus -EFBIG return is removed as well, which as Thadeu noted should have been -E2BIG to begin with. Reported-by: Thadeu Lima de Souza Cascardo Tested-by: Dietmar Eggemann Tested-by: Thadeu Lima de Souza Cascardo Acked-by: Thadeu Lima de Souza Cascardo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Patrick Bellasi Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: a509a7cd7974 ("sched/uclamp: Extend sched_setattr() to support utilization clamping") Link: https://lkml.kernel.org/r/20190904075532.GA26751@gmail.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 78 ++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 010d578118d6..df9f1fe5689b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5105,37 +5105,40 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) return retval; } -static int sched_read_attr(struct sched_attr __user *uattr, - struct sched_attr *attr, - unsigned int usize) +/* + * Copy the kernel size attribute structure (which might be larger + * than what user-space knows about) to user-space. + * + * Note that all cases are valid: user-space buffer can be larger or + * smaller than the kernel-space buffer. The usual case is that both + * have the same size. + */ +static int +sched_attr_copy_to_user(struct sched_attr __user *uattr, + struct sched_attr *kattr, + unsigned int usize) { - int ret; + unsigned int ksize = sizeof(*kattr); if (!access_ok(uattr, usize)) return -EFAULT; /* - * If we're handed a smaller struct than we know of, - * ensure all the unknown bits are 0 - i.e. old - * user-space does not get uncomplete information. + * sched_getattr() ABI forwards and backwards compatibility: + * + * If usize == ksize then we just copy everything to user-space and all is good. + * + * If usize < ksize then we only copy as much as user-space has space for, + * this keeps ABI compatibility as well. We skip the rest. + * + * If usize > ksize then user-space is using a newer version of the ABI, + * which part the kernel doesn't know about. Just ignore it - tooling can + * detect the kernel's knowledge of attributes from the attr->size value + * which is set to ksize in this case. */ - if (usize < sizeof(*attr)) { - unsigned char *addr; - unsigned char *end; + kattr->size = min(usize, ksize); - addr = (void *)attr + usize; - end = (void *)attr + sizeof(*attr); - - for (; addr < end; addr++) { - if (*addr) - return -EFBIG; - } - - attr->size = usize; - } - - ret = copy_to_user(uattr, attr, attr->size); - if (ret) + if (copy_to_user(uattr, kattr, kattr->size)) return -EFAULT; return 0; @@ -5145,20 +5148,18 @@ static int sched_read_attr(struct sched_attr __user *uattr, * sys_sched_getattr - similar to sched_getparam, but with sched_attr * @pid: the pid in question. * @uattr: structure containing the extended parameters. - * @size: sizeof(attr) for fwd/bwd comp. + * @usize: sizeof(attr) that user-space knows about, for forwards and backwards compatibility. * @flags: for future extension. */ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, - unsigned int, size, unsigned int, flags) + unsigned int, usize, unsigned int, flags) { - struct sched_attr attr = { - .size = sizeof(struct sched_attr), - }; + struct sched_attr kattr = { }; struct task_struct *p; int retval; - if (!uattr || pid < 0 || size > PAGE_SIZE || - size < SCHED_ATTR_SIZE_VER0 || flags) + if (!uattr || pid < 0 || usize > PAGE_SIZE || + usize < SCHED_ATTR_SIZE_VER0 || flags) return -EINVAL; rcu_read_lock(); @@ -5171,25 +5172,24 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, if (retval) goto out_unlock; - attr.sched_policy = p->policy; + kattr.sched_policy = p->policy; if (p->sched_reset_on_fork) - attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; + kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; if (task_has_dl_policy(p)) - __getparam_dl(p, &attr); + __getparam_dl(p, &kattr); else if (task_has_rt_policy(p)) - attr.sched_priority = p->rt_priority; + kattr.sched_priority = p->rt_priority; else - attr.sched_nice = task_nice(p); + kattr.sched_nice = task_nice(p); #ifdef CONFIG_UCLAMP_TASK - attr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value; - attr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value; + kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value; + kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value; #endif rcu_read_unlock(); - retval = sched_read_attr(uattr, &attr, size); - return retval; + return sched_attr_copy_to_user(uattr, &kattr, usize); out_unlock: rcu_read_unlock(); From b0841eefd9693827afb9888235e26ddd098f9cef Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 31 Aug 2019 09:43:43 +0200 Subject: [PATCH 069/169] configfs: provide exclusion between IO and removals Make sure that attribute methods are not called after the item has been removed from the tree. To do so, we * at the point of no return in removals, grab ->frag_sem exclusive and mark the fragment dead. * call the methods of attributes with ->frag_sem taken shared and only after having verified that the fragment is still alive. The main benefit is for method instances - they are guaranteed that the objects they are accessing *and* all ancestors are still there. Another win is that we don't need to bother with extra refcount on config_item when opening a file - the item will be alive for as long as it stays in the tree, and we won't touch it/attributes/any associated data after it's been removed from the tree. Signed-off-by: Al Viro Signed-off-by: Christoph Hellwig --- fs/configfs/dir.c | 23 ++++++++++++++ fs/configfs/file.c | 77 ++++++++++++++++++++++++++++++++++------------ 2 files changed, 81 insertions(+), 19 deletions(-) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 1df65c2440da..79fc25aaa8cd 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1461,6 +1461,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) struct config_item *item; struct configfs_subsystem *subsys; struct configfs_dirent *sd; + struct configfs_fragment *frag; struct module *subsys_owner = NULL, *dead_item_owner = NULL; int ret; @@ -1518,6 +1519,16 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) } } while (ret == -EAGAIN); + frag = sd->s_frag; + if (down_write_killable(&frag->frag_sem)) { + spin_lock(&configfs_dirent_lock); + configfs_detach_rollback(dentry); + spin_unlock(&configfs_dirent_lock); + return -EINTR; + } + frag->frag_dead = true; + up_write(&frag->frag_sem); + /* Get a working ref for the duration of this function */ item = configfs_get_config_item(dentry); @@ -1821,6 +1832,12 @@ void configfs_unregister_group(struct config_group *group) struct configfs_subsystem *subsys = group->cg_subsys; struct dentry *dentry = group->cg_item.ci_dentry; struct dentry *parent = group->cg_item.ci_parent->ci_dentry; + struct configfs_dirent *sd = dentry->d_fsdata; + struct configfs_fragment *frag = sd->s_frag; + + down_write(&frag->frag_sem); + frag->frag_dead = true; + up_write(&frag->frag_sem); inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); spin_lock(&configfs_dirent_lock); @@ -1947,12 +1964,18 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) struct config_group *group = &subsys->su_group; struct dentry *dentry = group->cg_item.ci_dentry; struct dentry *root = dentry->d_sb->s_root; + struct configfs_dirent *sd = dentry->d_fsdata; + struct configfs_fragment *frag = sd->s_frag; if (dentry->d_parent != root) { pr_err("Tried to unregister non-subsystem!\n"); return; } + down_write(&frag->frag_sem); + frag->frag_dead = true; + up_write(&frag->frag_sem); + inode_lock_nested(d_inode(root), I_MUTEX_PARENT); inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD); diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 9d5e888df60b..fb65b706cc0d 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -48,22 +48,32 @@ struct configfs_buffer { }; }; - -static int fill_read_buffer(struct configfs_buffer * buffer) +static inline struct configfs_fragment *to_frag(struct file *file) { - ssize_t count; + struct configfs_dirent *sd = file->f_path.dentry->d_fsdata; + + return sd->s_frag; +} + +static int fill_read_buffer(struct file *file, struct configfs_buffer *buffer) +{ + struct configfs_fragment *frag = to_frag(file); + ssize_t count = -ENOENT; if (!buffer->page) buffer->page = (char *) get_zeroed_page(GFP_KERNEL); if (!buffer->page) return -ENOMEM; - count = buffer->attr->show(buffer->item, buffer->page); + down_read(&frag->frag_sem); + if (!frag->frag_dead) + count = buffer->attr->show(buffer->item, buffer->page); + up_read(&frag->frag_sem); + if (count < 0) return count; if (WARN_ON_ONCE(count > (ssize_t)SIMPLE_ATTR_SIZE)) return -EIO; - buffer->needs_read_fill = 0; buffer->count = count; return 0; @@ -96,7 +106,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp mutex_lock(&buffer->mutex); if (buffer->needs_read_fill) { - retval = fill_read_buffer(buffer); + retval = fill_read_buffer(file, buffer); if (retval) goto out; } @@ -133,6 +143,7 @@ static ssize_t configfs_read_bin_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + struct configfs_fragment *frag = to_frag(file); struct configfs_buffer *buffer = file->private_data; ssize_t retval = 0; ssize_t len = min_t(size_t, count, PAGE_SIZE); @@ -148,7 +159,12 @@ configfs_read_bin_file(struct file *file, char __user *buf, if (buffer->needs_read_fill) { /* perform first read with buf == NULL to get extent */ - len = buffer->bin_attr->read(buffer->item, NULL, 0); + down_read(&frag->frag_sem); + if (!frag->frag_dead) + len = buffer->bin_attr->read(buffer->item, NULL, 0); + else + len = -ENOENT; + up_read(&frag->frag_sem); if (len <= 0) { retval = len; goto out; @@ -168,8 +184,13 @@ configfs_read_bin_file(struct file *file, char __user *buf, buffer->bin_buffer_size = len; /* perform second read to fill buffer */ - len = buffer->bin_attr->read(buffer->item, - buffer->bin_buffer, len); + down_read(&frag->frag_sem); + if (!frag->frag_dead) + len = buffer->bin_attr->read(buffer->item, + buffer->bin_buffer, len); + else + len = -ENOENT; + up_read(&frag->frag_sem); if (len < 0) { retval = len; vfree(buffer->bin_buffer); @@ -220,9 +241,16 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size } static int -flush_write_buffer(struct configfs_buffer *buffer, size_t count) +flush_write_buffer(struct file *file, struct configfs_buffer *buffer, size_t count) { - return buffer->attr->store(buffer->item, buffer->page, count); + struct configfs_fragment *frag = to_frag(file); + int res = -ENOENT; + + down_read(&frag->frag_sem); + if (!frag->frag_dead) + res = buffer->attr->store(buffer->item, buffer->page, count); + up_read(&frag->frag_sem); + return res; } @@ -252,7 +280,7 @@ configfs_write_file(struct file *file, const char __user *buf, size_t count, lof mutex_lock(&buffer->mutex); len = fill_write_buffer(buffer, buf, count); if (len > 0) - len = flush_write_buffer(buffer, len); + len = flush_write_buffer(file, buffer, len); if (len > 0) *ppos += len; mutex_unlock(&buffer->mutex); @@ -328,6 +356,7 @@ configfs_write_bin_file(struct file *file, const char __user *buf, static int __configfs_open_file(struct inode *inode, struct file *file, int type) { struct dentry *dentry = file->f_path.dentry; + struct configfs_fragment *frag = to_frag(file); struct configfs_attribute *attr; struct configfs_buffer *buffer; int error; @@ -337,8 +366,13 @@ static int __configfs_open_file(struct inode *inode, struct file *file, int type if (!buffer) goto out; + error = -ENOENT; + down_read(&frag->frag_sem); + if (unlikely(frag->frag_dead)) + goto out_free_buffer; + error = -EINVAL; - buffer->item = configfs_get_config_item(dentry->d_parent); + buffer->item = to_item(dentry->d_parent); if (!buffer->item) goto out_free_buffer; @@ -396,6 +430,7 @@ static int __configfs_open_file(struct inode *inode, struct file *file, int type buffer->read_in_progress = false; buffer->write_in_progress = false; file->private_data = buffer; + up_read(&frag->frag_sem); return 0; out_put_module: @@ -403,6 +438,7 @@ static int __configfs_open_file(struct inode *inode, struct file *file, int type out_put_item: config_item_put(buffer->item); out_free_buffer: + up_read(&frag->frag_sem); kfree(buffer); out: return error; @@ -412,8 +448,6 @@ static int configfs_release(struct inode *inode, struct file *filp) { struct configfs_buffer *buffer = filp->private_data; - if (buffer->item) - config_item_put(buffer->item); module_put(buffer->owner); if (buffer->page) free_page((unsigned long)buffer->page); @@ -439,12 +473,17 @@ static int configfs_release_bin_file(struct inode *inode, struct file *file) buffer->read_in_progress = false; if (buffer->write_in_progress) { + struct configfs_fragment *frag = to_frag(file); buffer->write_in_progress = false; - /* result of ->release() is ignored */ - buffer->bin_attr->write(buffer->item, buffer->bin_buffer, - buffer->bin_buffer_size); - + down_read(&frag->frag_sem); + if (!frag->frag_dead) { + /* result of ->release() is ignored */ + buffer->bin_attr->write(buffer->item, + buffer->bin_buffer, + buffer->bin_buffer_size); + } + up_read(&frag->frag_sem); /* vfree on NULL is safe */ vfree(buffer->bin_buffer); buffer->bin_buffer = NULL; From 591328948ba60423e28d000da99d34f7586de54a Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 2 Sep 2019 11:34:08 +0100 Subject: [PATCH 070/169] rxrpc: Fix misplaced traceline There's a misplaced traceline in rxrpc_input_packet() which is looking at a packet that just got released rather than the replacement packet. Fix this by moving the traceline after the assignment that moves the new packet pointer to the actual packet pointer. Fixes: d0d5c0cd1e71 ("rxrpc: Use skb_unshare() rather than skb_cow_data()") Reported-by: Hillf Danton Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index d122c53c8697..157be1ff8697 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1262,8 +1262,8 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) if (nskb != skb) { rxrpc_eaten_skb(skb, rxrpc_skb_received); - rxrpc_new_skb(skb, rxrpc_skb_unshared); skb = nskb; + rxrpc_new_skb(skb, rxrpc_skb_unshared); sp = rxrpc_skb(skb); } } From 10eb56c582c557c629271f1ee31e15e7a9b2558b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 2 Sep 2019 23:24:21 +0800 Subject: [PATCH 071/169] sctp: use transport pf_retrans in sctp_do_8_2_transport_strike Transport should use its own pf_retrans to do the error_count check, instead of asoc's. Otherwise, it's meaningless to make pf_retrans per transport. Fixes: 5aa93bcf66f4 ("sctp: Implement quick failover draft from tsvwg") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 1cf5bb5b73c4..e52b2128e43b 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -547,7 +547,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_cmd_seq *commands, if (net->sctp.pf_enable && (transport->state == SCTP_ACTIVE) && (transport->error_count < transport->pathmaxrxt) && - (transport->error_count > asoc->pf_retrans)) { + (transport->error_count > transport->pf_retrans)) { sctp_assoc_control_transport(asoc, transport, SCTP_TRANSPORT_PF, From d55a2e374a94fa34a3048c6a2be535266e506d97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Mon, 2 Sep 2019 09:23:36 -0700 Subject: [PATCH 072/169] net-ipv6: fix excessive RTF_ADDRCONF flag on ::1/128 local route (and others) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a subtle change in behaviour introduced by: commit c7a1ce397adacaf5d4bb2eab0a738b5f80dc3e43 'ipv6: Change addrconf_f6i_alloc to use ip6_route_info_create' Before that patch /proc/net/ipv6_route includes: 00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000003 00000000 80200001 lo Afterwards /proc/net/ipv6_route includes: 00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000002 00000000 80240001 lo ie. the above commit causes the ::1/128 local (automatic) route to be flagged with RTF_ADDRCONF (0x040000). AFAICT, this is incorrect since these routes are *not* coming from RA's. As such, this patch restores the old behaviour. Fixes: c7a1ce397ada ("ipv6: Change addrconf_f6i_alloc to use ip6_route_info_create") Cc: David Ahern Cc: Lorenzo Colitti Signed-off-by: Maciej Żenczykowski Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fd059e08785a..2927e72e511a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4388,13 +4388,14 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, struct fib6_config cfg = { .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL, .fc_ifindex = idev->dev->ifindex, - .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP, + .fc_flags = RTF_UP | RTF_NONEXTHOP, .fc_dst = *addr, .fc_dst_len = 128, .fc_protocol = RTPROT_KERNEL, .fc_nlinfo.nl_net = net, .fc_ignore_dev_down = true, }; + struct fib6_info *f6i; if (anycast) { cfg.fc_type = RTN_ANYCAST; @@ -4404,7 +4405,10 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, cfg.fc_flags |= RTF_LOCAL; } - return ip6_route_info_create(&cfg, gfp_flags, NULL); + f6i = ip6_route_info_create(&cfg, gfp_flags, NULL); + if (f6i) + f6i->dst_nocount = true; + return f6i; } /* remove deleted ip from prefsrc entries */ From 02e740aeca79e9a3c464bc2ebb510457c85d2015 Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Wed, 4 Sep 2019 19:17:02 +0100 Subject: [PATCH 073/169] Documentation/process: Volunteer as the ambassador for Xen Cc: Jonathan Corbet Cc: Thomas Gleixner Cc: Tyler Hicks Cc: Ben Hutchings Cc: Konrad Rzeszutek Wilk Cc: Josh Poimboeuf Cc: Jiri Kosina Signed-off-by: Andrew Cooper Link: https://lore.kernel.org/r/20190904181702.19788-1-andrew.cooper3@citrix.com Signed-off-by: Greg Kroah-Hartman --- Documentation/process/embargoed-hardware-issues.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index d37cbc502936..f9e8c0b23c52 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -221,7 +221,7 @@ an involved disclosed party. The current ambassadors list: Microsoft VMware - XEN + Xen Andrew Cooper Canonical Tyler Hicks Debian Ben Hutchings From f56f791f6d8e8296c9dd194506ee2b703b1227d4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Sep 2019 09:24:49 -0700 Subject: [PATCH 074/169] Documentation/process: Add Google contact for embargoed hardware issues This adds myself as the Google contact for embargoed hardware security issues and fixes some small typos. Cc: Thomas Gleixner Cc: Matt Linton Cc: Matthew Garrett Signed-off-by: Kees Cook Acked-by: Guenter Roeck Link: https://lore.kernel.org/r/201909040922.56496BF70@keescook Signed-off-by: Greg Kroah-Hartman --- Documentation/process/embargoed-hardware-issues.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index f9e8c0b23c52..47316d9c24f5 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -5,7 +5,7 @@ Scope ----- Hardware issues which result in security problems are a different category -of security bugs than pure software bugs which only affect the Linux +of security bugs than pure software bugs which only affect the Linux kernel. Hardware issues like Meltdown, Spectre, L1TF etc. must be treated @@ -159,7 +159,7 @@ Mitigation development The initial response team sets up an encrypted mailing-list or repurposes an existing one if appropriate. The disclosing party should provide a list -of contacts for all other parties who have already been, or should be +of contacts for all other parties who have already been, or should be, informed about the issue. The response team contacts these parties so they can name experts who should be subscribed to the mailing-list. @@ -230,8 +230,8 @@ an involved disclosed party. The current ambassadors list: SUSE Jiri Kosina Amazon - Google - ============== ======================================================== + Google Kees Cook + ============= ======================================================== If you want your organization to be added to the ambassadors list, please contact the hardware security team. The nominated ambassador has to From 68d19d7d995759b96169da5aac313363f92a9075 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 4 Sep 2019 20:13:08 +0200 Subject: [PATCH 075/169] Revert "Bluetooth: validate BLE connection interval updates" This reverts commit c49a8682fc5d298d44e8d911f4fa14690ea9485e. There are devices which require low connection intervals for usable operation including keyboards and mice. Forcing a static connection interval for these types of devices has an impact in latency and causes a regression. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- net/bluetooth/hci_event.c | 5 ----- net/bluetooth/l2cap_core.c | 9 +-------- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index cdb00c2ef242..c1d3a303d97f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5660,11 +5660,6 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, return send_conn_param_neg_reply(hdev, handle, HCI_ERROR_UNKNOWN_CONN_ID); - if (min < hcon->le_conn_min_interval || - max > hcon->le_conn_max_interval) - return send_conn_param_neg_reply(hdev, handle, - HCI_ERROR_INVALID_LL_PARAMS); - if (hci_check_conn_params(min, max, latency, timeout)) return send_conn_param_neg_reply(hdev, handle, HCI_ERROR_INVALID_LL_PARAMS); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index dfc1edb168b7..da7fdbdf9c41 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5305,14 +5305,7 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, memset(&rsp, 0, sizeof(rsp)); - if (min < hcon->le_conn_min_interval || - max > hcon->le_conn_max_interval) { - BT_DBG("requested connection interval exceeds current bounds."); - err = -EINVAL; - } else { - err = hci_check_conn_params(min, max, latency, to_multiplier); - } - + err = hci_check_conn_params(min, max, latency, to_multiplier); if (err) rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); else From 42dec1dbe38239cf91cc1f4df7830c66276ced37 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 3 Sep 2019 17:53:12 +0800 Subject: [PATCH 076/169] tipc: add NULL pointer check before calling kfree_rcu Unlike kfree(p), kfree_rcu(p, rcu) won't do NULL pointer check. When tipc_nametbl_remove_publ returns NULL, the panic below happens: BUG: unable to handle kernel NULL pointer dereference at 0000000000000068 RIP: 0010:__call_rcu+0x1d/0x290 Call Trace: tipc_publ_notify+0xa9/0x170 [tipc] tipc_node_write_unlock+0x8d/0x100 [tipc] tipc_node_link_down+0xae/0x1d0 [tipc] tipc_node_check_dest+0x3ea/0x8f0 [tipc] ? tipc_disc_rcv+0x2c7/0x430 [tipc] tipc_disc_rcv+0x2c7/0x430 [tipc] ? tipc_rcv+0x6bb/0xf20 [tipc] tipc_rcv+0x6bb/0xf20 [tipc] ? ip_route_input_slow+0x9cf/0xb10 tipc_udp_recv+0x195/0x1e0 [tipc] ? tipc_udp_is_known_peer+0x80/0x80 [tipc] udp_queue_rcv_skb+0x180/0x460 udp_unicast_rcv_skb.isra.56+0x75/0x90 __udp4_lib_rcv+0x4ce/0xb90 ip_local_deliver_finish+0x11c/0x210 ip_local_deliver+0x6b/0xe0 ? ip_rcv_finish+0xa9/0x410 ip_rcv+0x273/0x362 Fixes: 97ede29e80ee ("tipc: convert name table read-write lock to RCU") Reported-by: Li Shuang Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/tipc/name_distr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 44abc8e9c990..241ed2274473 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -223,7 +223,8 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) publ->key); } - kfree_rcu(p, rcu); + if (p) + kfree_rcu(p, rcu); } /** From ebe26aca98fcf9fbe5017b5cbe216413cee69df5 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Tue, 3 Sep 2019 11:46:52 -0700 Subject: [PATCH 077/169] net: fixed_phy: Add forward declaration for struct gpio_desc; Add forward declaration for struct gpio_desc in order to address the following: ./include/linux/phy_fixed.h:48:17: error: 'struct gpio_desc' declared inside parameter list [-Werror] ./include/linux/phy_fixed.h:48:17: error: its scope is only this definition or declaration, which is probably not what you want [-Werror] Fixes: 71bd106d2567 ("net: fixed-phy: Add fixed_phy_register_with_gpiod() API") Signed-off-by: Moritz Fischer Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy_fixed.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 1e5d86ebdaeb..52bc8e487ef7 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -11,6 +11,7 @@ struct fixed_phy_status { }; struct device_node; +struct gpio_desc; #if IS_ENABLED(CONFIG_FIXED_PHY) extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier); From 44580a0118d3ede95fec4dce32df5f75f73cd663 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 3 Sep 2019 13:24:50 -0700 Subject: [PATCH 078/169] net: sock_map, fix missing ulp check in sock hash case sock_map and ULP only work together when ULP is loaded after the sock map is loaded. In the sock_map case we added a check for this to fail the load if ULP is already set. However, we missed the check on the sock_hash side. Add a ULP check to the sock_hash update path. Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Reported-by: syzbot+7a6ee4d0078eac6bf782@syzkaller.appspotmail.com Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/core/sock_map.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 1330a7442e5b..50916f9bc4f2 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -656,6 +656,7 @@ static int sock_hash_update_common(struct bpf_map *map, void *key, struct sock *sk, u64 flags) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct inet_connection_sock *icsk = inet_csk(sk); u32 key_size = map->key_size, hash; struct bpf_htab_elem *elem, *elem_new; struct bpf_htab_bucket *bucket; @@ -666,6 +667,8 @@ static int sock_hash_update_common(struct bpf_map *map, void *key, WARN_ON_ONCE(!rcu_read_lock_held()); if (unlikely(flags > BPF_EXIST)) return -EINVAL; + if (unlikely(icsk->icsk_ulp_data)) + return -EINVAL; link = sk_psock_init_link(); if (!link) From 4255ff0544ee307e9a8acf66000e6fd49e9203f8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 3 Sep 2019 15:22:12 -0700 Subject: [PATCH 079/169] ipv6: Fix RTA_MULTIPATH with nexthop objects A change to the core nla helpers was missed during the push of the nexthop changes. rt6_fill_node_nexthop should be calling nla_nest_start_noflag not nla_nest_start. Currently, iproute2 does not print multipath data because of parsing issues with the attribute. Fixes: f88d8ea67fbd ("ipv6: Plumb support for nexthop object in a fib6_info") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2927e72e511a..977cfd09e2a3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5329,7 +5329,7 @@ static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh, if (nexthop_is_multipath(nh)) { struct nlattr *mp; - mp = nla_nest_start(skb, RTA_MULTIPATH); + mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); if (!mp) goto nla_put_failure; From 91bfb564853f4aaa0487a2b5e9c6ecd400768abd Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 3 Sep 2019 15:22:13 -0700 Subject: [PATCH 080/169] selftest: A few cleanups for fib_nexthops.sh Cleanups of the tests in fib_nexthops.sh 1. Several tests noted unexpected route output, but the discrepancy was not showing in the summary output and overlooked in the verbose output. Add a WARNING message to the summary output to make it clear a test is not showing expected output. 2. Several check_* calls are missing extra data like scope and metric causing mismatches when the nexthops or routes are correct - some of them are a side effect of the evolving iproute2 command. Update the data to the expected output. 3. Several check_routes are checking for the wrong nexthop data, most likely a copy-paste-update error. 4. A couple of tests were re-using a nexthop id that already existed. Fix those to use a new id. Fixes: 6345266a9989 ("selftests: Add test cases for nexthop objects") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_nexthops.sh | 24 +++++++++++---------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index c5c93d5fb3ad..f9ebeac1e6f2 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -212,6 +212,8 @@ check_output() printf " ${out}\n" printf " Expected:\n" printf " ${expected}\n\n" + else + echo " WARNING: Unexpected route entry" fi fi @@ -274,7 +276,7 @@ ipv6_fcnal() run_cmd "$IP nexthop get id 52" log_test $? 0 "Get nexthop by id" - check_nexthop "id 52" "id 52 via 2001:db8:91::2 dev veth1" + check_nexthop "id 52" "id 52 via 2001:db8:91::2 dev veth1 scope link" run_cmd "$IP nexthop del id 52" log_test $? 0 "Delete nexthop by id" @@ -479,12 +481,12 @@ ipv6_fcnal_runtime() run_cmd "$IP -6 nexthop add id 85 dev veth1" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 85" log_test $? 0 "IPv6 route with device only nexthop" - check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1" + check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1 metric 1024 pref medium" run_cmd "$IP nexthop add id 123 group 81/85" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 123" log_test $? 0 "IPv6 multipath route with nexthop mix - dev only + gw" - check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 nexthop via 2001:db8:91::2 dev veth1 nexthop dev veth1" + check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 123 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop dev veth1 weight 1 pref medium" # # IPv6 route with v4 nexthop - not allowed @@ -538,7 +540,7 @@ ipv4_fcnal() run_cmd "$IP nexthop get id 12" log_test $? 0 "Get nexthop by id" - check_nexthop "id 12" "id 12 via 172.16.1.2 src 172.16.1.1 dev veth1 scope link" + check_nexthop "id 12" "id 12 via 172.16.1.2 dev veth1 scope link" run_cmd "$IP nexthop del id 12" log_test $? 0 "Delete nexthop by id" @@ -685,7 +687,7 @@ ipv4_withv6_fcnal() set +e run_cmd "$IP ro add 172.16.101.1/32 nhid 11" log_test $? 0 "IPv6 nexthop with IPv4 route" - check_route "172.16.101.1" "172.16.101.1 nhid 11 via ${lladdr} dev veth1" + check_route "172.16.101.1" "172.16.101.1 nhid 11 via inet6 ${lladdr} dev veth1" set -e run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1" @@ -694,11 +696,11 @@ ipv4_withv6_fcnal() run_cmd "$IP ro replace 172.16.101.1/32 nhid 101" log_test $? 0 "IPv6 nexthop with IPv4 route" - check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1" + check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via inet6 ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1" run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1" log_test $? 0 "IPv4 route with IPv6 gateway" - check_route "172.16.101.1" "172.16.101.1 via ${lladdr} dev veth1" + check_route "172.16.101.1" "172.16.101.1 via inet6 ${lladdr} dev veth1" run_cmd "$IP ro replace 172.16.101.1/32 via inet6 2001:db8:50::1 dev veth1" log_test $? 2 "IPv4 route with invalid IPv6 gateway" @@ -785,10 +787,10 @@ ipv4_fcnal_runtime() log_test $? 0 "IPv4 route with device only nexthop" check_route "172.16.101.1" "172.16.101.1 nhid 85 dev veth1" - run_cmd "$IP nexthop add id 122 group 21/85" - run_cmd "$IP ro replace 172.16.101.1/32 nhid 122" + run_cmd "$IP nexthop add id 123 group 21/85" + run_cmd "$IP ro replace 172.16.101.1/32 nhid 123" log_test $? 0 "IPv4 multipath route with nexthop mix - dev only + gw" - check_route "172.16.101.1" "172.16.101.1 nhid 85 nexthop via 172.16.1.2 dev veth1 nexthop dev veth1" + check_route "172.16.101.1" "172.16.101.1 nhid 123 nexthop via 172.16.1.2 dev veth1 weight 1 nexthop dev veth1 weight 1" # # IPv4 with IPv6 @@ -820,7 +822,7 @@ ipv4_fcnal_runtime() run_cmd "$IP ro replace 172.16.101.1/32 nhid 101" log_test $? 0 "IPv4 route with mixed v4-v6 multipath route" - check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1" + check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via inet6 ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1" run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" log_test $? 0 "IPv6 nexthop with IPv4 route" From 7bdf4de1267780aa194b3a28c85a6c4d617b0bdb Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Wed, 4 Sep 2019 10:11:58 -0400 Subject: [PATCH 081/169] net: Properly update v4 routes with v6 nexthop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When creating a v4 route that uses a v6 nexthop from a nexthop group. Allow the kernel to properly send the nexthop as v6 via the RTA_VIA attribute. Broken behavior: $ ip nexthop add via fe80::9 dev eth0 $ ip nexthop show id 1 via fe80::9 dev eth0 scope link $ ip route add 4.5.6.7/32 nhid 1 $ ip route show default via 10.0.2.2 dev eth0 4.5.6.7 nhid 1 via 254.128.0.0 dev eth0 10.0.2.0/24 dev eth0 proto kernel scope link src 10.0.2.15 $ Fixed behavior: $ ip nexthop add via fe80::9 dev eth0 $ ip nexthop show id 1 via fe80::9 dev eth0 scope link $ ip route add 4.5.6.7/32 nhid 1 $ ip route show default via 10.0.2.2 dev eth0 4.5.6.7 nhid 1 via inet6 fe80::9 dev eth0 10.0.2.0/24 dev eth0 proto kernel scope link src 10.0.2.15 $ v2, v3: Addresses code review comments from David Ahern Fixes: dcb1ecb50edf (“ipv4: Prepare for fib6_nh from a nexthop object”) Signed-off-by: Donald Sharp Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 4 ++-- include/net/nexthop.h | 5 +++-- net/ipv4/fib_semantics.c | 15 ++++++++------- net/ipv6/route.c | 11 ++++++----- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 4c81846ccce8..ab1ca9e238d2 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -513,7 +513,7 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct netlink_callback *cb); int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh, - unsigned char *flags, bool skip_oif); + u8 rt_family, unsigned char *flags, bool skip_oif); int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh, - int nh_weight); + int nh_weight, u8 rt_family); #endif /* _NET_FIB_H */ diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 95f766c31c90..331ebbc94fe7 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -161,7 +161,8 @@ struct nexthop *nexthop_mpath_select(const struct nexthop *nh, int nhsel) } static inline -int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh) +int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, + u8 rt_family) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); int i; @@ -172,7 +173,7 @@ int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh) struct fib_nh_common *nhc = &nhi->fib_nhc; int weight = nhg->nh_entries[i].weight; - if (fib_add_nexthop(skb, nhc, weight) < 0) + if (fib_add_nexthop(skb, nhc, weight, rt_family) < 0) return -EMSGSIZE; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 2db089e10ba0..0913a090b2bf 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1582,7 +1582,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, } int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc, - unsigned char *flags, bool skip_oif) + u8 rt_family, unsigned char *flags, bool skip_oif) { if (nhc->nhc_flags & RTNH_F_DEAD) *flags |= RTNH_F_DEAD; @@ -1613,7 +1613,7 @@ int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc, /* if gateway family does not match nexthop family * gateway is encoded as RTA_VIA */ - if (nhc->nhc_gw_family != nhc->nhc_family) { + if (rt_family != nhc->nhc_gw_family) { int alen = sizeof(struct in6_addr); struct nlattr *nla; struct rtvia *via; @@ -1654,7 +1654,7 @@ EXPORT_SYMBOL_GPL(fib_nexthop_info); #if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6) int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, - int nh_weight) + int nh_weight, u8 rt_family) { const struct net_device *dev = nhc->nhc_dev; struct rtnexthop *rtnh; @@ -1667,7 +1667,7 @@ int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, rtnh->rtnh_hops = nh_weight - 1; rtnh->rtnh_ifindex = dev ? dev->ifindex : 0; - if (fib_nexthop_info(skb, nhc, &flags, true) < 0) + if (fib_nexthop_info(skb, nhc, rt_family, &flags, true) < 0) goto nla_put_failure; rtnh->rtnh_flags = flags; @@ -1693,13 +1693,14 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) goto nla_put_failure; if (unlikely(fi->nh)) { - if (nexthop_mpath_fill_node(skb, fi->nh) < 0) + if (nexthop_mpath_fill_node(skb, fi->nh, AF_INET) < 0) goto nla_put_failure; goto mp_end; } for_nexthops(fi) { - if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight) < 0) + if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight, + AF_INET) < 0) goto nla_put_failure; #ifdef CONFIG_IP_ROUTE_CLASSID if (nh->nh_tclassid && @@ -1775,7 +1776,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, const struct fib_nh_common *nhc = fib_info_nhc(fi, 0); unsigned char flags = 0; - if (fib_nexthop_info(skb, nhc, &flags, false) < 0) + if (fib_nexthop_info(skb, nhc, AF_INET, &flags, false) < 0) goto nla_put_failure; rtm->rtm_flags = flags; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 977cfd09e2a3..85bcd97f7a72 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5333,7 +5333,7 @@ static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh, if (!mp) goto nla_put_failure; - if (nexthop_mpath_fill_node(skb, nh)) + if (nexthop_mpath_fill_node(skb, nh, AF_INET6)) goto nla_put_failure; nla_nest_end(skb, mp); @@ -5341,7 +5341,7 @@ static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh, struct fib6_nh *fib6_nh; fib6_nh = nexthop_fib6_nh(nh); - if (fib_nexthop_info(skb, &fib6_nh->nh_common, + if (fib_nexthop_info(skb, &fib6_nh->nh_common, AF_INET6, flags, false) < 0) goto nla_put_failure; } @@ -5470,13 +5470,14 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, goto nla_put_failure; if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common, - rt->fib6_nh->fib_nh_weight) < 0) + rt->fib6_nh->fib_nh_weight, AF_INET6) < 0) goto nla_put_failure; list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) { if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common, - sibling->fib6_nh->fib_nh_weight) < 0) + sibling->fib6_nh->fib_nh_weight, + AF_INET6) < 0) goto nla_put_failure; } @@ -5493,7 +5494,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, rtm->rtm_flags |= nh_flags; } else { - if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, + if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, AF_INET6, &nh_flags, false) < 0) goto nla_put_failure; From 2339cd6cd0b5401fa3fe886bf1c0cb8822041957 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 3 Sep 2019 15:16:17 -0700 Subject: [PATCH 082/169] bpf: fix precision tracking of stack slots The problem can be seen in the following two tests: 0: (bf) r3 = r10 1: (55) if r3 != 0x7b goto pc+0 2: (7a) *(u64 *)(r3 -8) = 0 3: (79) r4 = *(u64 *)(r10 -8) .. 0: (85) call bpf_get_prandom_u32#7 1: (bf) r3 = r10 2: (55) if r3 != 0x7b goto pc+0 3: (7b) *(u64 *)(r3 -8) = r0 4: (79) r4 = *(u64 *)(r10 -8) When backtracking need to mark R4 it will mark slot fp-8. But ST or STX into fp-8 could belong to the same block of instructions. When backtracing is done the parent state may have fp-8 slot as "unallocated stack". Which will cause verifier to warn and incorrectly reject such programs. Writes into stack via non-R10 register are rare. llvm always generates canonical stack spill/fill. For such pathological case fall back to conservative precision tracking instead of rejecting. Reported-by: syzbot+c8d66267fd2b5955287e@syzkaller.appspotmail.com Fixes: b5dc0163d8fd ("bpf: precise scalar_value tracking") Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b5c14c9d7b98..c36a719fee6d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1772,16 +1772,21 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, bitmap_from_u64(mask, stack_mask); for_each_set_bit(i, mask, 64) { if (i >= func->allocated_stack / BPF_REG_SIZE) { - /* This can happen if backtracking - * is propagating stack precision where - * caller has larger stack frame - * than callee, but backtrack_insn() should - * have returned -ENOTSUPP. + /* the sequence of instructions: + * 2: (bf) r3 = r10 + * 3: (7b) *(u64 *)(r3 -8) = r0 + * 4: (79) r4 = *(u64 *)(r10 -8) + * doesn't contain jmps. It's backtracked + * as a single block. + * During backtracking insn 3 is not recognized as + * stack access, so at the end of backtracking + * stack slot fp-8 is still marked in stack_mask. + * However the parent state may not have accessed + * fp-8 and it's "unallocated" stack space. + * In such case fallback to conservative. */ - verbose(env, "BUG spi %d stack_size %d\n", - i, func->allocated_stack); - WARN_ONCE(1, "verifier backtracking bug"); - return -EFAULT; + mark_all_scalars_precise(env, st); + return 0; } if (func->stack[i].slot_type[0] != STACK_SPILL) { From 08b0c891605acf727e43e3e03a25857d3e789b61 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 15 Aug 2019 11:30:50 +0300 Subject: [PATCH 083/169] drm/vmwgfx: Fix double free in vmw_recv_msg() We recently added a kfree() after the end of the loop: if (retries == RETRIES) { kfree(reply); return -EINVAL; } There are two problems. First the test is wrong and because retries equals RETRIES if we succeed on the last iteration through the loop. Second if we fail on the last iteration through the loop then the kfree is a double free. When you're reading this code, please note the break statement at the end of the while loop. This patch changes the loop so that if it's not successful then "reply" is NULL and we can test for that afterward. Cc: Fixes: 6b7c3b86f0b6 ("drm/vmwgfx: fix memory leak when too many retries have occurred") Signed-off-by: Dan Carpenter Reviewed-by: Thomas Hellstrom Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_msg.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c index 59e9d05ab928..0af048d1a815 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -353,7 +353,7 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg, !!(HIGH_WORD(ecx) & MESSAGE_STATUS_HB)); if ((HIGH_WORD(ebx) & MESSAGE_STATUS_SUCCESS) == 0) { kfree(reply); - + reply = NULL; if ((HIGH_WORD(ebx) & MESSAGE_STATUS_CPT) != 0) { /* A checkpoint occurred. Retry. */ continue; @@ -377,7 +377,7 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg, if ((HIGH_WORD(ecx) & MESSAGE_STATUS_SUCCESS) == 0) { kfree(reply); - + reply = NULL; if ((HIGH_WORD(ecx) & MESSAGE_STATUS_CPT) != 0) { /* A checkpoint occurred. Retry. */ continue; @@ -389,10 +389,8 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg, break; } - if (retries == RETRIES) { - kfree(reply); + if (!reply) return -EINVAL; - } *msg_len = reply_len; *msg = reply; From eddf3e9c7c7e4d0707c68d1bb22cc6ec8aef7d4a Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Wed, 4 Sep 2019 20:46:25 +0800 Subject: [PATCH 084/169] genirq: Prevent NULL pointer dereference in resend_irqs() The following crash was observed: Unable to handle kernel NULL pointer dereference at 0000000000000158 Internal error: Oops: 96000004 [#1] SMP pc : resend_irqs+0x68/0xb0 lr : resend_irqs+0x64/0xb0 ... Call trace: resend_irqs+0x68/0xb0 tasklet_action_common.isra.6+0x84/0x138 tasklet_action+0x2c/0x38 __do_softirq+0x120/0x324 run_ksoftirqd+0x44/0x60 smpboot_thread_fn+0x1ac/0x1e8 kthread+0x134/0x138 ret_from_fork+0x10/0x18 The reason for this is that the interrupt resend mechanism happens in soft interrupt context, which is a asynchronous mechanism versus other operations on interrupts. free_irq() does not take resend handling into account. Thus, the irq descriptor might be already freed before the resend tasklet is executed. resend_irqs() does not check the return value of the interrupt descriptor lookup and derefences the return value unconditionally. 1): __setup_irq irq_startup check_irq_resend // activate softirq to handle resend irq 2): irq_domain_free_irqs irq_free_descs free_desc call_rcu(&desc->rcu, delayed_free_desc) 3): __do_softirq tasklet_action resend_irqs desc = irq_to_desc(irq) desc->handle_irq(desc) // desc is NULL --> Ooops Fix this by adding a NULL pointer check in resend_irqs() before derefencing the irq descriptor. Fixes: a4633adcdbc1 ("[PATCH] genirq: add genirq sw IRQ-retrigger") Signed-off-by: Yunfeng Ye Signed-off-by: Thomas Gleixner Reviewed-by: Zhiqiang Liu Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1630ae13-5c8e-901e-de09-e740b6a426a7@huawei.com --- kernel/irq/resend.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 95414ad3506a..98c04ca5fa43 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -36,6 +36,8 @@ static void resend_irqs(unsigned long arg) irq = find_first_bit(irqs_resend, nr_irqs); clear_bit(irq, irqs_resend); desc = irq_to_desc(irq); + if (!desc) + continue; local_irq_disable(); desc->handle_irq(desc); local_irq_enable(); From d41a3effbb53b1bcea41e328d16a4d046a508381 Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Mon, 2 Sep 2019 13:37:29 +0100 Subject: [PATCH 085/169] keys: Fix missing null pointer check in request_key_auth_describe() If a request_key authentication token key gets revoked, there's a window in which request_key_auth_describe() can see it with a NULL payload - but it makes no check for this and something like the following oops may occur: BUG: Kernel NULL pointer dereference at 0x00000038 Faulting instruction address: 0xc0000000004ddf30 Oops: Kernel access of bad area, sig: 11 [#1] ... NIP [...] request_key_auth_describe+0x90/0xd0 LR [...] request_key_auth_describe+0x54/0xd0 Call Trace: [...] request_key_auth_describe+0x54/0xd0 (unreliable) [...] proc_keys_show+0x308/0x4c0 [...] seq_read+0x3d0/0x540 [...] proc_reg_read+0x90/0x110 [...] __vfs_read+0x3c/0x70 [...] vfs_read+0xb4/0x1b0 [...] ksys_read+0x7c/0x130 [...] system_call+0x5c/0x70 Fix this by checking for a NULL pointer when describing such a key. Also make the read routine check for a NULL pointer to be on the safe side. [DH: Modified to not take already-held rcu lock and modified to also check in the read routine] Fixes: 04c567d9313e ("[PATCH] Keys: Fix race between two instantiators of a key") Reported-by: Sachin Sant Signed-off-by: Hillf Danton Signed-off-by: David Howells Tested-by: Sachin Sant Signed-off-by: Linus Torvalds --- security/keys/request_key_auth.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index e73ec040e250..ecba39c93fd9 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -66,6 +66,9 @@ static void request_key_auth_describe(const struct key *key, { struct request_key_auth *rka = dereference_key_rcu(key); + if (!rka) + return; + seq_puts(m, "key:"); seq_puts(m, key->description); if (key_is_positive(key)) @@ -83,6 +86,9 @@ static long request_key_auth_read(const struct key *key, size_t datalen; long ret; + if (!rka) + return -EKEYREVOKED; + datalen = rka->callout_len; ret = datalen; From 310aa0a25b338b3100c94880c9a69bec8ce8c3ae Mon Sep 17 00:00:00 2001 From: Mark-PK Tsai Date: Fri, 6 Sep 2019 14:01:16 +0800 Subject: [PATCH 086/169] perf/hw_breakpoint: Fix arch_hw_breakpoint use-before-initialization If we disable the compiler's auto-initialization feature, if -fplugin-arg-structleak_plugin-byref or -ftrivial-auto-var-init=pattern are disabled, arch_hw_breakpoint may be used before initialization after: 9a4903dde2c86 ("perf/hw_breakpoint: Split attribute parse and commit") On our ARM platform, the struct step_ctrl in arch_hw_breakpoint, which used to be zero-initialized by kzalloc(), may be used in arch_install_hw_breakpoint() without initialization. Signed-off-by: Mark-PK Tsai Cc: Alexander Shishkin Cc: Alix Wu Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: YJ Chiang Link: https://lkml.kernel.org/r/20190906060115.9460-1-mark-pk.tsai@mediatek.com [ Minor edits. ] Signed-off-by: Ingo Molnar --- kernel/events/hw_breakpoint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index c5cd852fe86b..3cc8416ec844 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -413,7 +413,7 @@ static int hw_breakpoint_parse(struct perf_event *bp, int register_perf_hw_breakpoint(struct perf_event *bp) { - struct arch_hw_breakpoint hw; + struct arch_hw_breakpoint hw = { }; int err; err = reserve_bp_slot(bp); @@ -461,7 +461,7 @@ int modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr, bool check) { - struct arch_hw_breakpoint hw; + struct arch_hw_breakpoint hw = { }; int err; err = hw_breakpoint_parse(bp, attr, &hw); From e16c2983fba0fa6763e43ad10916be35e3d8dc05 Mon Sep 17 00:00:00 2001 From: Steve Wahl Date: Thu, 5 Sep 2019 15:23:46 -0500 Subject: [PATCH 087/169] x86/purgatory: Change compiler flags from -mcmodel=kernel to -mcmodel=large to fix kexec relocation errors The last change to this Makefile caused relocation errors when loading a kdump kernel. Restore -mcmodel=large (not -mcmodel=kernel), -ffreestanding, and -fno-zero-initialized-bsss, without reverting to the former practice of resetting KBUILD_CFLAGS. Purgatory.ro is a standalone binary that is not linked against the rest of the kernel. Its image is copied into an array that is linked to the kernel, and from there kexec relocates it wherever it desires. With the previous change to compiler flags, the error "kexec: Overflow in relocation type 11 value 0x11fffd000" was encountered when trying to load the crash kernel. This is from kexec code trying to relocate the purgatory.ro object. From the error message, relocation type 11 is R_X86_64_32S. The x86_64 ABI says: "The R_X86_64_32 and R_X86_64_32S relocations truncate the computed value to 32-bits. The linker must verify that the generated value for the R_X86_64_32 (R_X86_64_32S) relocation zero-extends (sign-extends) to the original 64-bit value." This type of relocation doesn't work when kexec chooses to place the purgatory binary in memory that is not reachable with 32 bit addresses. The compiler flag -mcmodel=kernel allows those type of relocations to be emitted, so revert to using -mcmodel=large as was done before. Also restore the -ffreestanding and -fno-zero-initialized-bss flags because they are appropriate for a stand alone piece of object code which doesn't explicitly zero the bss, and one other report has said undefined symbols are encountered without -ffreestanding. These identical compiler flag changes need to happen for every object that becomes part of the purgatory.ro object, so gather them together first into PURGATORY_CFLAGS_REMOVE and PURGATORY_CFLAGS, and then apply them to each of the objects that have C source. Do not apply any of these flags to kexec-purgatory.o, which is not part of the standalone object but part of the kernel proper. Tested-by: Vaibhav Rustagi Tested-by: Andreas Smas Signed-off-by: Steve Wahl Reviewed-by: Nick Desaulniers Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: None Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: clang-built-linux@googlegroups.com Cc: dimitri.sivanich@hpe.com Cc: mike.travis@hpe.com Cc: russ.anderson@hpe.com Fixes: b059f801a937 ("x86/purgatory: Use CFLAGS_REMOVE rather than reset KBUILD_CFLAGS") Link: https://lkml.kernel.org/r/20190905202346.GA26595@swahl-linux Signed-off-by: Ingo Molnar --- arch/x86/purgatory/Makefile | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 8901a1f89cf5..10fb42da0007 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -18,37 +18,40 @@ targets += purgatory.ro KASAN_SANITIZE := n KCOV_INSTRUMENT := n +# These are adjustments to the compiler flags used for objects that +# make up the standalone purgatory.ro + +PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel +PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss + # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That # in turn leaves some undefined symbols like __fentry__ in purgatory and not # sure how to relocate those. ifdef CONFIG_FUNCTION_TRACER -CFLAGS_REMOVE_sha256.o += $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_purgatory.o += $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_string.o += $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_kexec-purgatory.o += $(CC_FLAGS_FTRACE) +PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_FTRACE) endif ifdef CONFIG_STACKPROTECTOR -CFLAGS_REMOVE_sha256.o += -fstack-protector -CFLAGS_REMOVE_purgatory.o += -fstack-protector -CFLAGS_REMOVE_string.o += -fstack-protector -CFLAGS_REMOVE_kexec-purgatory.o += -fstack-protector +PURGATORY_CFLAGS_REMOVE += -fstack-protector endif ifdef CONFIG_STACKPROTECTOR_STRONG -CFLAGS_REMOVE_sha256.o += -fstack-protector-strong -CFLAGS_REMOVE_purgatory.o += -fstack-protector-strong -CFLAGS_REMOVE_string.o += -fstack-protector-strong -CFLAGS_REMOVE_kexec-purgatory.o += -fstack-protector-strong +PURGATORY_CFLAGS_REMOVE += -fstack-protector-strong endif ifdef CONFIG_RETPOLINE -CFLAGS_REMOVE_sha256.o += $(RETPOLINE_CFLAGS) -CFLAGS_REMOVE_purgatory.o += $(RETPOLINE_CFLAGS) -CFLAGS_REMOVE_string.o += $(RETPOLINE_CFLAGS) -CFLAGS_REMOVE_kexec-purgatory.o += $(RETPOLINE_CFLAGS) +PURGATORY_CFLAGS_REMOVE += $(RETPOLINE_CFLAGS) endif +CFLAGS_REMOVE_purgatory.o += $(PURGATORY_CFLAGS_REMOVE) +CFLAGS_purgatory.o += $(PURGATORY_CFLAGS) + +CFLAGS_REMOVE_sha256.o += $(PURGATORY_CFLAGS_REMOVE) +CFLAGS_sha256.o += $(PURGATORY_CFLAGS) + +CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE) +CFLAGS_string.o += $(PURGATORY_CFLAGS) + $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) From 36b7200f67dfe75b416b5281ed4ace9927b513bc Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Thu, 5 Sep 2019 12:09:48 -0500 Subject: [PATCH 088/169] iommu/amd: Flush old domains in kdump kernel When devices are attached to the amd_iommu in a kdump kernel, the old device table entries (DTEs), which were copied from the crashed kernel, will be overwritten with a new domain number. When the new DTE is written, the IOMMU is told to flush the DTE from its internal cache--but it is not told to flush the translation cache entries for the old domain number. Without this patch, AMD systems using the tg3 network driver fail when kdump tries to save the vmcore to a network system, showing network timeouts and (sometimes) IOMMU errors in the kernel log. This patch will flush IOMMU translation cache entries for the old domain when a DTE gets overwritten with a new domain number. Signed-off-by: Stuart Hayes Fixes: 3ac3e5ee5ed5 ('iommu/amd: Copy old trans table from old kernel') Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b607a92791d3..f853b96ee547 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1143,6 +1143,17 @@ static void amd_iommu_flush_tlb_all(struct amd_iommu *iommu) iommu_completion_wait(iommu); } +static void amd_iommu_flush_tlb_domid(struct amd_iommu *iommu, u32 dom_id) +{ + struct iommu_cmd cmd; + + build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, + dom_id, 1); + iommu_queue_command(iommu, &cmd); + + iommu_completion_wait(iommu); +} + static void amd_iommu_flush_all(struct amd_iommu *iommu) { struct iommu_cmd cmd; @@ -1873,6 +1884,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, { u64 pte_root = 0; u64 flags = 0; + u32 old_domid; if (domain->mode != PAGE_MODE_NONE) pte_root = iommu_virt_to_phys(domain->pt_root); @@ -1922,8 +1934,20 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, flags &= ~DEV_DOMID_MASK; flags |= domain->id; + old_domid = amd_iommu_dev_table[devid].data[1] & DEV_DOMID_MASK; amd_iommu_dev_table[devid].data[1] = flags; amd_iommu_dev_table[devid].data[0] = pte_root; + + /* + * A kdump kernel might be replacing a domain ID that was copied from + * the previous kernel--if so, it needs to flush the translation cache + * entries for the old domain ID that is being overwritten + */ + if (old_domid) { + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + + amd_iommu_flush_tlb_domid(iommu, old_domid); + } } static void clear_dte_entry(u16 devid) From 754265bcab78a9014f0f99cd35e0d610fcd7dfa7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 6 Sep 2019 10:39:54 +0200 Subject: [PATCH 089/169] iommu/amd: Fix race in increase_address_space() After the conversion to lock-less dma-api call the increase_address_space() function can be called without any locking. Multiple CPUs could potentially race for increasing the address space, leading to invalid domain->mode settings and invalid page-tables. This has been happening in the wild under high IO load and memory pressure. Fix the race by locking this operation. The function is called infrequently so that this does not introduce a performance regression in the dma-api path again. Reported-by: Qian Cai Fixes: 256e4621c21a ('iommu/amd: Make use of the generic IOVA allocator') Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index f853b96ee547..61de81965c44 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1435,18 +1435,21 @@ static void free_pagetable(struct protection_domain *domain) * another level increases the size of the address space by 9 bits to a size up * to 64 bits. */ -static bool increase_address_space(struct protection_domain *domain, +static void increase_address_space(struct protection_domain *domain, gfp_t gfp) { + unsigned long flags; u64 *pte; - if (domain->mode == PAGE_MODE_6_LEVEL) + spin_lock_irqsave(&domain->lock, flags); + + if (WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL)) /* address space already 64 bit large */ - return false; + goto out; pte = (void *)get_zeroed_page(gfp); if (!pte) - return false; + goto out; *pte = PM_LEVEL_PDE(domain->mode, iommu_virt_to_phys(domain->pt_root)); @@ -1454,7 +1457,10 @@ static bool increase_address_space(struct protection_domain *domain, domain->mode += 1; domain->updated = true; - return true; +out: + spin_unlock_irqrestore(&domain->lock, flags); + + return; } static u64 *alloc_pte(struct protection_domain *domain, From 8928e917aeafaf38d65cc5cbc1f11e952dbed062 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 5 Sep 2019 09:25:55 +0100 Subject: [PATCH 090/169] soc: qcom: geni: Provide parameter error checking When booting with ACPI, the Geni Serial Engine is not set as the I2C/SPI parent and thus, the wrapper (parent device) is unassigned. This causes the kernel to crash with a null dereference error. Link: https://lore.kernel.org/r/20190905082555.15020-1-lee.jones@linaro.org Fixes: 8bc529b25354 ("soc: qcom: geni: Add support for ACPI") Acked-by: Bjorn Andersson Reviewed-by: Stephen Boyd Signed-off-by: Lee Jones Signed-off-by: Arnd Bergmann --- drivers/soc/qcom/qcom-geni-se.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/soc/qcom/qcom-geni-se.c b/drivers/soc/qcom/qcom-geni-se.c index d5cf953b4337..7d622ea1274e 100644 --- a/drivers/soc/qcom/qcom-geni-se.c +++ b/drivers/soc/qcom/qcom-geni-se.c @@ -630,6 +630,9 @@ int geni_se_tx_dma_prep(struct geni_se *se, void *buf, size_t len, struct geni_wrapper *wrapper = se->wrapper; u32 val; + if (!wrapper) + return -EINVAL; + *iova = dma_map_single(wrapper->dev, buf, len, DMA_TO_DEVICE); if (dma_mapping_error(wrapper->dev, *iova)) return -EIO; @@ -663,6 +666,9 @@ int geni_se_rx_dma_prep(struct geni_se *se, void *buf, size_t len, struct geni_wrapper *wrapper = se->wrapper; u32 val; + if (!wrapper) + return -EINVAL; + *iova = dma_map_single(wrapper->dev, buf, len, DMA_FROM_DEVICE); if (dma_mapping_error(wrapper->dev, *iova)) return -EIO; From 1f493162b59b09224de4d9c94e4392807e0edcce Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Fri, 6 Sep 2019 05:58:52 -0400 Subject: [PATCH 091/169] Documentation/process/embargoed-hardware-issues: Microsoft ambassador Add Sasha Levin as Microsoft's process ambassador. Signed-off-by: Sasha Levin Signed-off-by: Sasha Levin Link: https://lore.kernel.org/r/20190906095852.23568-1-sashal@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/process/embargoed-hardware-issues.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index 47316d9c24f5..c101748bfd92 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -219,7 +219,7 @@ an involved disclosed party. The current ambassadors list: Intel Qualcomm - Microsoft + Microsoft Sasha Levin VMware Xen Andrew Cooper From 6e1cdedcf0362fed3aedfe051d46bd7ee2a85fe1 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Thu, 5 Sep 2019 09:57:12 +0800 Subject: [PATCH 092/169] net: sonic: return NETDEV_TX_OK if failed to map buffer NETDEV_TX_BUSY really should only be used by drivers that call netif_tx_stop_queue() at the wrong moment. If dma_map_single() is failed to map tx DMA buffer, it might trigger an infinite loop. This patch use NETDEV_TX_OK instead of NETDEV_TX_BUSY, and change printk to pr_err_ratelimited. Fixes: d9fb9f384292 ("*sonic/natsemi/ns83829: Move the National Semi-conductor drivers") Signed-off-by: Mao Wenan Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/sonic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index d0a01e8f000a..18fd62fbfb64 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -232,9 +232,9 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) laddr = dma_map_single(lp->device, skb->data, length, DMA_TO_DEVICE); if (!laddr) { - printk(KERN_ERR "%s: failed to map tx DMA buffer.\n", dev->name); + pr_err_ratelimited("%s: failed to map tx DMA buffer.\n", dev->name); dev_kfree_skb(skb); - return NETDEV_TX_BUSY; + return NETDEV_TX_OK; } sonic_tda_put(dev, entry, SONIC_TD_STATUS, 0); /* clear status */ From f4b633b911fd3b4cbe1dc065e8fb064078d0889d Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 5 Sep 2019 05:15:42 -0400 Subject: [PATCH 093/169] forcedeth: use per cpu to collect xmit/recv statistics When testing with a background iperf pushing 1Gbit/sec traffic and running both ifconfig and netstat to collect statistics, some deadlocks occurred. Ifconfig and netstat will call nv_get_stats64 to get software xmit/recv statistics. In the commit f5d827aece36 ("forcedeth: implement ndo_get_stats64() API"), the normal tx/rx variables is to collect tx/rx statistics. The fix is to replace normal tx/rx variables with per cpu 64-bit variable to collect xmit/recv statistics. The per cpu variable will avoid deadlocks and provide fast efficient statistics updates. In nv_probe, the per cpu variable is initialized. In nv_remove, this per cpu variable is freed. In xmit/recv process, this per cpu variable will be updated. In nv_get_stats64, this per cpu variable on each cpu is added up. Then the driver can get xmit/recv packets statistics. A test runs for several days with this commit, the deadlocks disappear and the performance is better. Tested: - iperf SMP x86_64 -> Client connecting to 1.1.1.108, TCP port 5001 TCP window size: 85.0 KByte (default) ------------------------------------------------------------ [ 3] local 1.1.1.105 port 38888 connected with 1.1.1.108 port 5001 [ ID] Interval Transfer Bandwidth [ 3] 0.0-10.0 sec 1.10 GBytes 943 Mbits/sec ifconfig results: enp0s9 Link encap:Ethernet HWaddr 00:21:28:6f:de:0f inet addr:1.1.1.105 Bcast:0.0.0.0 Mask:255.255.255.0 UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 RX packets:5774764531 errors:0 dropped:0 overruns:0 frame:0 TX packets:633534193 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:1000 RX bytes:7646159340904 (7.6 TB) TX bytes:11425340407722 (11.4 TB) netstat results: Kernel Interface table Iface MTU Met RX-OK RX-ERR RX-DRP RX-OVR TX-OK TX-ERR TX-DRP TX-OVR Flg ... enp0s9 1500 0 5774764531 0 0 0 633534193 0 0 0 BMRU ... Fixes: f5d827aece36 ("forcedeth: implement ndo_get_stats64() API") CC: Joe Jin CC: JUNXIAO_BI Reported-and-tested-by: Nan san Signed-off-by: Zhu Yanjun Signed-off-by: David S. Miller --- drivers/net/ethernet/nvidia/forcedeth.c | 143 ++++++++++++++++-------- 1 file changed, 99 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index b327b29f5d57..a6b4bfae4684 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -713,6 +713,21 @@ struct nv_skb_map { struct nv_skb_map *next_tx_ctx; }; +struct nv_txrx_stats { + u64 stat_rx_packets; + u64 stat_rx_bytes; /* not always available in HW */ + u64 stat_rx_missed_errors; + u64 stat_rx_dropped; + u64 stat_tx_packets; /* not always available in HW */ + u64 stat_tx_bytes; + u64 stat_tx_dropped; +}; + +#define nv_txrx_stats_inc(member) \ + __this_cpu_inc(np->txrx_stats->member) +#define nv_txrx_stats_add(member, count) \ + __this_cpu_add(np->txrx_stats->member, (count)) + /* * SMP locking: * All hardware access under netdev_priv(dev)->lock, except the performance @@ -797,10 +812,7 @@ struct fe_priv { /* RX software stats */ struct u64_stats_sync swstats_rx_syncp; - u64 stat_rx_packets; - u64 stat_rx_bytes; /* not always available in HW */ - u64 stat_rx_missed_errors; - u64 stat_rx_dropped; + struct nv_txrx_stats __percpu *txrx_stats; /* media detection workaround. * Locking: Within irq hander or disable_irq+spin_lock(&np->lock); @@ -826,9 +838,6 @@ struct fe_priv { /* TX software stats */ struct u64_stats_sync swstats_tx_syncp; - u64 stat_tx_packets; /* not always available in HW */ - u64 stat_tx_bytes; - u64 stat_tx_dropped; /* msi/msi-x fields */ u32 msi_flags; @@ -1721,6 +1730,39 @@ static void nv_update_stats(struct net_device *dev) } } +static void nv_get_stats(int cpu, struct fe_priv *np, + struct rtnl_link_stats64 *storage) +{ + struct nv_txrx_stats *src = per_cpu_ptr(np->txrx_stats, cpu); + unsigned int syncp_start; + u64 rx_packets, rx_bytes, rx_dropped, rx_missed_errors; + u64 tx_packets, tx_bytes, tx_dropped; + + do { + syncp_start = u64_stats_fetch_begin_irq(&np->swstats_rx_syncp); + rx_packets = src->stat_rx_packets; + rx_bytes = src->stat_rx_bytes; + rx_dropped = src->stat_rx_dropped; + rx_missed_errors = src->stat_rx_missed_errors; + } while (u64_stats_fetch_retry_irq(&np->swstats_rx_syncp, syncp_start)); + + storage->rx_packets += rx_packets; + storage->rx_bytes += rx_bytes; + storage->rx_dropped += rx_dropped; + storage->rx_missed_errors += rx_missed_errors; + + do { + syncp_start = u64_stats_fetch_begin_irq(&np->swstats_tx_syncp); + tx_packets = src->stat_tx_packets; + tx_bytes = src->stat_tx_bytes; + tx_dropped = src->stat_tx_dropped; + } while (u64_stats_fetch_retry_irq(&np->swstats_tx_syncp, syncp_start)); + + storage->tx_packets += tx_packets; + storage->tx_bytes += tx_bytes; + storage->tx_dropped += tx_dropped; +} + /* * nv_get_stats64: dev->ndo_get_stats64 function * Get latest stats value from the nic. @@ -1733,7 +1775,7 @@ nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage) __releases(&netdev_priv(dev)->hwstats_lock) { struct fe_priv *np = netdev_priv(dev); - unsigned int syncp_start; + int cpu; /* * Note: because HW stats are not always available and for @@ -1746,20 +1788,8 @@ nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage) */ /* software stats */ - do { - syncp_start = u64_stats_fetch_begin_irq(&np->swstats_rx_syncp); - storage->rx_packets = np->stat_rx_packets; - storage->rx_bytes = np->stat_rx_bytes; - storage->rx_dropped = np->stat_rx_dropped; - storage->rx_missed_errors = np->stat_rx_missed_errors; - } while (u64_stats_fetch_retry_irq(&np->swstats_rx_syncp, syncp_start)); - - do { - syncp_start = u64_stats_fetch_begin_irq(&np->swstats_tx_syncp); - storage->tx_packets = np->stat_tx_packets; - storage->tx_bytes = np->stat_tx_bytes; - storage->tx_dropped = np->stat_tx_dropped; - } while (u64_stats_fetch_retry_irq(&np->swstats_tx_syncp, syncp_start)); + for_each_online_cpu(cpu) + nv_get_stats(cpu, np, storage); /* If the nic supports hw counters then retrieve latest values */ if (np->driver_data & DEV_HAS_STATISTICS_V123) { @@ -1827,7 +1857,7 @@ static int nv_alloc_rx(struct net_device *dev) } else { packet_dropped: u64_stats_update_begin(&np->swstats_rx_syncp); - np->stat_rx_dropped++; + nv_txrx_stats_inc(stat_rx_dropped); u64_stats_update_end(&np->swstats_rx_syncp); return 1; } @@ -1869,7 +1899,7 @@ static int nv_alloc_rx_optimized(struct net_device *dev) } else { packet_dropped: u64_stats_update_begin(&np->swstats_rx_syncp); - np->stat_rx_dropped++; + nv_txrx_stats_inc(stat_rx_dropped); u64_stats_update_end(&np->swstats_rx_syncp); return 1; } @@ -2013,7 +2043,7 @@ static void nv_drain_tx(struct net_device *dev) } if (nv_release_txskb(np, &np->tx_skb[i])) { u64_stats_update_begin(&np->swstats_tx_syncp); - np->stat_tx_dropped++; + nv_txrx_stats_inc(stat_tx_dropped); u64_stats_update_end(&np->swstats_tx_syncp); } np->tx_skb[i].dma = 0; @@ -2227,7 +2257,7 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) /* on DMA mapping error - drop the packet */ dev_kfree_skb_any(skb); u64_stats_update_begin(&np->swstats_tx_syncp); - np->stat_tx_dropped++; + nv_txrx_stats_inc(stat_tx_dropped); u64_stats_update_end(&np->swstats_tx_syncp); return NETDEV_TX_OK; } @@ -2273,7 +2303,7 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) dev_kfree_skb_any(skb); np->put_tx_ctx = start_tx_ctx; u64_stats_update_begin(&np->swstats_tx_syncp); - np->stat_tx_dropped++; + nv_txrx_stats_inc(stat_tx_dropped); u64_stats_update_end(&np->swstats_tx_syncp); return NETDEV_TX_OK; } @@ -2384,7 +2414,7 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, /* on DMA mapping error - drop the packet */ dev_kfree_skb_any(skb); u64_stats_update_begin(&np->swstats_tx_syncp); - np->stat_tx_dropped++; + nv_txrx_stats_inc(stat_tx_dropped); u64_stats_update_end(&np->swstats_tx_syncp); return NETDEV_TX_OK; } @@ -2431,7 +2461,7 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, dev_kfree_skb_any(skb); np->put_tx_ctx = start_tx_ctx; u64_stats_update_begin(&np->swstats_tx_syncp); - np->stat_tx_dropped++; + nv_txrx_stats_inc(stat_tx_dropped); u64_stats_update_end(&np->swstats_tx_syncp); return NETDEV_TX_OK; } @@ -2560,9 +2590,12 @@ static int nv_tx_done(struct net_device *dev, int limit) && !(flags & NV_TX_RETRYCOUNT_MASK)) nv_legacybackoff_reseed(dev); } else { + unsigned int len; + u64_stats_update_begin(&np->swstats_tx_syncp); - np->stat_tx_packets++; - np->stat_tx_bytes += np->get_tx_ctx->skb->len; + nv_txrx_stats_inc(stat_tx_packets); + len = np->get_tx_ctx->skb->len; + nv_txrx_stats_add(stat_tx_bytes, len); u64_stats_update_end(&np->swstats_tx_syncp); } bytes_compl += np->get_tx_ctx->skb->len; @@ -2577,9 +2610,12 @@ static int nv_tx_done(struct net_device *dev, int limit) && !(flags & NV_TX2_RETRYCOUNT_MASK)) nv_legacybackoff_reseed(dev); } else { + unsigned int len; + u64_stats_update_begin(&np->swstats_tx_syncp); - np->stat_tx_packets++; - np->stat_tx_bytes += np->get_tx_ctx->skb->len; + nv_txrx_stats_inc(stat_tx_packets); + len = np->get_tx_ctx->skb->len; + nv_txrx_stats_add(stat_tx_bytes, len); u64_stats_update_end(&np->swstats_tx_syncp); } bytes_compl += np->get_tx_ctx->skb->len; @@ -2627,9 +2663,12 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit) nv_legacybackoff_reseed(dev); } } else { + unsigned int len; + u64_stats_update_begin(&np->swstats_tx_syncp); - np->stat_tx_packets++; - np->stat_tx_bytes += np->get_tx_ctx->skb->len; + nv_txrx_stats_inc(stat_tx_packets); + len = np->get_tx_ctx->skb->len; + nv_txrx_stats_add(stat_tx_bytes, len); u64_stats_update_end(&np->swstats_tx_syncp); } @@ -2806,6 +2845,15 @@ static int nv_getlen(struct net_device *dev, void *packet, int datalen) } } +static void rx_missing_handler(u32 flags, struct fe_priv *np) +{ + if (flags & NV_RX_MISSEDFRAME) { + u64_stats_update_begin(&np->swstats_rx_syncp); + nv_txrx_stats_inc(stat_rx_missed_errors); + u64_stats_update_end(&np->swstats_rx_syncp); + } +} + static int nv_rx_process(struct net_device *dev, int limit) { struct fe_priv *np = netdev_priv(dev); @@ -2848,11 +2896,7 @@ static int nv_rx_process(struct net_device *dev, int limit) } /* the rest are hard errors */ else { - if (flags & NV_RX_MISSEDFRAME) { - u64_stats_update_begin(&np->swstats_rx_syncp); - np->stat_rx_missed_errors++; - u64_stats_update_end(&np->swstats_rx_syncp); - } + rx_missing_handler(flags, np); dev_kfree_skb(skb); goto next_pkt; } @@ -2896,8 +2940,8 @@ static int nv_rx_process(struct net_device *dev, int limit) skb->protocol = eth_type_trans(skb, dev); napi_gro_receive(&np->napi, skb); u64_stats_update_begin(&np->swstats_rx_syncp); - np->stat_rx_packets++; - np->stat_rx_bytes += len; + nv_txrx_stats_inc(stat_rx_packets); + nv_txrx_stats_add(stat_rx_bytes, len); u64_stats_update_end(&np->swstats_rx_syncp); next_pkt: if (unlikely(np->get_rx.orig++ == np->last_rx.orig)) @@ -2982,8 +3026,8 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit) } napi_gro_receive(&np->napi, skb); u64_stats_update_begin(&np->swstats_rx_syncp); - np->stat_rx_packets++; - np->stat_rx_bytes += len; + nv_txrx_stats_inc(stat_rx_packets); + nv_txrx_stats_add(stat_rx_bytes, len); u64_stats_update_end(&np->swstats_rx_syncp); } else { dev_kfree_skb(skb); @@ -5651,6 +5695,12 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) SET_NETDEV_DEV(dev, &pci_dev->dev); u64_stats_init(&np->swstats_rx_syncp); u64_stats_init(&np->swstats_tx_syncp); + np->txrx_stats = alloc_percpu(struct nv_txrx_stats); + if (!np->txrx_stats) { + pr_err("np->txrx_stats, alloc memory error.\n"); + err = -ENOMEM; + goto out_alloc_percpu; + } timer_setup(&np->oom_kick, nv_do_rx_refill, 0); timer_setup(&np->nic_poll, nv_do_nic_poll, 0); @@ -6060,6 +6110,8 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) out_disable: pci_disable_device(pci_dev); out_free: + free_percpu(np->txrx_stats); +out_alloc_percpu: free_netdev(dev); out: return err; @@ -6105,6 +6157,9 @@ static void nv_restore_mac_addr(struct pci_dev *pci_dev) static void nv_remove(struct pci_dev *pci_dev) { struct net_device *dev = pci_get_drvdata(pci_dev); + struct fe_priv *np = netdev_priv(dev); + + free_percpu(np->txrx_stats); unregister_netdev(dev); From b88dd52c62bb5c5d58f0963287f41fd084352c57 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Sep 2019 05:20:22 -0700 Subject: [PATCH 094/169] net: sched: fix reordering issues Whenever MQ is not used on a multiqueue device, we experience serious reordering problems. Bisection found the cited commit. The issue can be described this way : - A single qdisc hierarchy is shared by all transmit queues. (eg : tc qdisc replace dev eth0 root fq_codel) - When/if try_bulk_dequeue_skb_slow() dequeues a packet targetting a different transmit queue than the one used to build a packet train, we stop building the current list and save the 'bad' skb (P1) in a special queue. (bad_txq) - When dequeue_skb() calls qdisc_dequeue_skb_bad_txq() and finds this skb (P1), it checks if the associated transmit queues is still in frozen state. If the queue is still blocked (by BQL or NIC tx ring full), we leave the skb in bad_txq and return NULL. - dequeue_skb() calls q->dequeue() to get another packet (P2) The other packet can target the problematic queue (that we found in frozen state for the bad_txq packet), but another cpu just ran TX completion and made room in the txq that is now ready to accept new packets. - Packet P2 is sent while P1 is still held in bad_txq, P1 might be sent at next round. In practice P2 is the lead of a big packet train (P2,P3,P4 ...) filling the BQL budget and delaying P1 by many packets :/ To solve this problem, we have to block the dequeue process as long as the first packet in bad_txq can not be sent. Reordering issues disappear and no side effects have been seen. Fixes: a53851e2c321 ("net: sched: explicit locking in gso_cpu fallback") Signed-off-by: Eric Dumazet Cc: John Fastabend Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 137db1cbde85..ac28f6a5d70e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -46,6 +46,8 @@ EXPORT_SYMBOL(default_qdisc_ops); * - updates to tree and tree walking are only done under the rtnl mutex. */ +#define SKB_XOFF_MAGIC ((struct sk_buff *)1UL) + static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q) { const struct netdev_queue *txq = q->dev_queue; @@ -71,7 +73,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q) q->q.qlen--; } } else { - skb = NULL; + skb = SKB_XOFF_MAGIC; } } @@ -253,8 +255,11 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, return skb; skb = qdisc_dequeue_skb_bad_txq(q); - if (unlikely(skb)) + if (unlikely(skb)) { + if (skb == SKB_XOFF_MAGIC) + return NULL; goto bulk; + } skb = q->dequeue(q); if (skb) { bulk: From b0a3caeafded18111b638cf1adeba10bfd58a12d Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Thu, 5 Sep 2019 18:26:08 +0530 Subject: [PATCH 095/169] MAINTAINERS: add myself as maintainer for xilinx axiethernet driver I am maintaining xilinx axiethernet driver in xilinx tree and would like to maintain it in the mainline kernel as well. Hence adding myself as a maintainer. Also Anirudha and John has moved to new roles, so based on request removing them from the maintainer list. Signed-off-by: Radhey Shyam Pandey Acked-by: John Linn Acked-by: Michal Simek Signed-off-by: David S. Miller --- MAINTAINERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index e7a47b5210fd..a50e97a63bc8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17699,8 +17699,7 @@ F: include/uapi/linux/dqblk_xfs.h F: include/uapi/linux/fsmap.h XILINX AXI ETHERNET DRIVER -M: Anirudha Sarangi -M: John Linn +M: Radhey Shyam Pandey S: Maintained F: drivers/net/ethernet/xilinx/xilinx_axienet* From a3200debde5fda3e25f7e22cb2bc17a9280f7811 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 6 Sep 2019 10:46:14 +0300 Subject: [PATCH 096/169] drm/modes: Make the whitelist more const The commit 3764137906a5 ("drm/modes: Introduce a whitelist for the named modes") introduced a whitelist in the named modes lookup code in order to be a bit more robust. However, even though the char pointers were made const, the array itself was not. Let's fix that. Fixes: 3764137906a5 ("drm/modes: Introduce a whitelist for the named modes") Suggested-by: Jani Nikula Reviewed-by: Jani Nikula Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20190906074614.30608-1-mripard@kernel.org --- drivers/gpu/drm/drm_modes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 0d23bf729e9f..c814bcef18a4 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -1677,7 +1677,7 @@ static int drm_mode_parse_cmdline_options(char *str, size_t len, return 0; } -static const char *drm_named_modes_whitelist[] = { +static const char * const drm_named_modes_whitelist[] = { "NTSC", "PAL", }; From 78e05972c5e6c8e9ca4c00ccc6985409da69f904 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 5 Sep 2019 16:20:09 +0200 Subject: [PATCH 097/169] ipc: fix semtimedop for generic 32-bit architectures As Vincent noticed, the y2038 conversion of semtimedop in linux-5.1 broke when commit 00bf25d693e7 ("y2038: use time32 syscall names on 32-bit") changed all system calls on all architectures that take a 32-bit time_t to point to the _time32 implementation, but left out semtimedop in the asm-generic header. This affects all 32-bit architectures using asm-generic/unistd.h: h8300, unicore32, openrisc, nios2, hexagon, c6x, arc, nds32 and csky. The notable exception is riscv32, which has dropped support for the time32 system calls entirely. Reported-by: Vincent Chen Cc: stable@vger.kernel.org Cc: Vincent Chen Cc: Greentime Hu Cc: Yoshinori Sato Cc: Guan Xuetao Cc: Stafford Horne Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: Ley Foon Tan Cc: Richard Kuo Cc: Mark Salter Cc: Aurelien Jacquiot Cc: Guo Ren Fixes: 00bf25d693e7 ("y2038: use time32 syscall names on 32-bit") Signed-off-by: Arnd Bergmann --- include/uapi/asm-generic/unistd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 1be0e798e362..1fc8faa6e973 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -569,7 +569,7 @@ __SYSCALL(__NR_semget, sys_semget) __SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl) #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_semtimedop 192 -__SC_COMP(__NR_semtimedop, sys_semtimedop, sys_semtimedop_time32) +__SC_3264(__NR_semtimedop, sys_semtimedop_time32, sys_semtimedop) #endif #define __NR_semop 193 __SYSCALL(__NR_semop, sys_semop) From 0c043d70d04711fe6c380df9065fdc44192c49bf Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Fri, 6 Sep 2019 12:02:32 -0700 Subject: [PATCH 098/169] Input: elan_i2c - remove Lenovo Legion Y7000 PnpID Looks like the Bios of the Lenovo Legion Y7000 is using ELAN061B when the actual device is supposed to be used with hid-multitouch. Remove it from the list of the supported device, hoping that no one will complain about the loss in functionality. Link: https://bugzilla.kernel.org/show_bug.cgi?id=203467 Fixes: 738c06d0e456 ("Input: elan_i2c - add hardware ID for multiple Lenovo laptops") Signed-off-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- include/linux/input/elan-i2c-ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/input/elan-i2c-ids.h b/include/linux/input/elan-i2c-ids.h index ceabb01a6a7d..1ecb6b45812c 100644 --- a/include/linux/input/elan-i2c-ids.h +++ b/include/linux/input/elan-i2c-ids.h @@ -48,7 +48,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0618", 0 }, { "ELAN0619", 0 }, { "ELAN061A", 0 }, - { "ELAN061B", 0 }, +/* { "ELAN061B", 0 }, not working on the Lenovo Legion Y7000 */ { "ELAN061C", 0 }, { "ELAN061D", 0 }, { "ELAN061E", 0 }, From b82573fdbef809803d06295f318f7ad1a2c5ceb9 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 26 Aug 2019 21:02:09 +0200 Subject: [PATCH 099/169] net/hamradio/6pack: Fix the size of a sk_buff used in 'sp_bump()' We 'allocate' 'count' bytes here. In fact, 'dev_alloc_skb' already add some extra space for padding, so a bit more is allocated. However, we use 1 byte for the KISS command, then copy 'count' bytes, so count+1 bytes. Explicitly allocate and use 1 more byte to be safe. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/hamradio/6pack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 331c16d30d5d..23281aeeb222 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -344,10 +344,10 @@ static void sp_bump(struct sixpack *sp, char cmd) sp->dev->stats.rx_bytes += count; - if ((skb = dev_alloc_skb(count)) == NULL) + if ((skb = dev_alloc_skb(count + 1)) == NULL) goto out_mem; - ptr = skb_put(skb, count); + ptr = skb_put(skb, count + 1); *ptr++ = cmd; /* KISS command */ memcpy(ptr, sp->cooked_buf + 1, count); From 63b2ed4e10b2e6c913e1d8cdd728e7fba4115a3d Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Thu, 5 Sep 2019 19:46:18 +0300 Subject: [PATCH 100/169] net: phylink: Fix flow control resolution Regarding to IEEE 802.3-2015 standard section 2 28B.3 Priority resolution - Table 28-3 - Pause resolution In case of Local device Pause=1 AsymDir=0, Link partner Pause=1 AsymDir=1, Local device resolution should be enable PAUSE transmit, disable PAUSE receive. And in case of Local device Pause=1 AsymDir=1, Link partner Pause=1 AsymDir=0, Local device resolution should be enable PAUSE receive, disable PAUSE transmit. Fixes: 9525ae83959b ("phylink: add phylink infrastructure") Signed-off-by: Stefan Chulski Reported-by: Shaul Ben-Mayor Acked-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index a45c5de96ab1..a5a57ca94c1a 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -376,8 +376,8 @@ static void phylink_get_fixed_state(struct phylink *pl, struct phylink_link_stat * Local device Link partner * Pause AsymDir Pause AsymDir Result * 1 X 1 X TX+RX - * 0 1 1 1 RX - * 1 1 0 1 TX + * 0 1 1 1 TX + * 1 1 0 1 RX */ static void phylink_resolve_flow(struct phylink *pl, struct phylink_link_state *state) @@ -398,7 +398,7 @@ static void phylink_resolve_flow(struct phylink *pl, new_pause = MLO_PAUSE_TX | MLO_PAUSE_RX; else if (pause & MLO_PAUSE_ASYM) new_pause = state->pause & MLO_PAUSE_SYM ? - MLO_PAUSE_RX : MLO_PAUSE_TX; + MLO_PAUSE_TX : MLO_PAUSE_RX; } else { new_pause = pl->link_config.pause & MLO_PAUSE_TXRX_MASK; } From 1c2977c094998de032fee6e898c88b4a05483d08 Mon Sep 17 00:00:00 2001 From: Juliet Kim Date: Thu, 5 Sep 2019 17:30:01 -0400 Subject: [PATCH 101/169] net/ibmvnic: free reset work of removed device from queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 36f1031c51a2 ("ibmvnic: Do not process reset during or after device removal") made the change to exit reset if the driver has been removed, but does not free reset work items of the adapter from queue. Ensure all reset work items are freed when breaking out of the loop early. Fixes: 36f1031c51a2 ("ibmnvic: Do not process reset during or after device removal”) Signed-off-by: Juliet Kim Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index fa4bb940665c..6644cabc8e75 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1985,7 +1985,10 @@ static void __ibmvnic_reset(struct work_struct *work) while (rwi) { if (adapter->state == VNIC_REMOVING || adapter->state == VNIC_REMOVED) - goto out; + kfree(rwi); + rc = EBUSY; + break; + } if (adapter->force_reset_recovery) { adapter->force_reset_recovery = false; @@ -2011,7 +2014,7 @@ static void __ibmvnic_reset(struct work_struct *work) netdev_dbg(adapter->netdev, "Reset failed\n"); free_all_rwi(adapter); } -out: + adapter->resetting = false; if (we_lock_rtnl) rtnl_unlock(); From fe163e534e5eecdfd7b5920b0dfd24c458ee85d6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 5 Sep 2019 19:36:37 -0700 Subject: [PATCH 102/169] isdn/capi: check message length in capi_write() syzbot reported: BUG: KMSAN: uninit-value in capi_write+0x791/0xa90 drivers/isdn/capi/capi.c:700 CPU: 0 PID: 10025 Comm: syz-executor379 Not tainted 4.20.0-rc7+ #2 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x173/0x1d0 lib/dump_stack.c:113 kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:613 __msan_warning+0x82/0xf0 mm/kmsan/kmsan_instr.c:313 capi_write+0x791/0xa90 drivers/isdn/capi/capi.c:700 do_loop_readv_writev fs/read_write.c:703 [inline] do_iter_write+0x83e/0xd80 fs/read_write.c:961 vfs_writev fs/read_write.c:1004 [inline] do_writev+0x397/0x840 fs/read_write.c:1039 __do_sys_writev fs/read_write.c:1112 [inline] __se_sys_writev+0x9b/0xb0 fs/read_write.c:1109 __x64_sys_writev+0x4a/0x70 fs/read_write.c:1109 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 [...] The problem is that capi_write() is reading past the end of the message. Fix it by checking the message's length in the needed places. Reported-and-tested-by: syzbot+0849c524d9c634f5ae66@syzkaller.appspotmail.com Signed-off-by: Eric Biggers Signed-off-by: David S. Miller --- drivers/isdn/capi/capi.c | 10 +++++++++- include/uapi/linux/isdn/capicmd.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c index 3c3ad42f22bf..c92b405b7646 100644 --- a/drivers/isdn/capi/capi.c +++ b/drivers/isdn/capi/capi.c @@ -688,6 +688,9 @@ capi_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos if (!cdev->ap.applid) return -ENODEV; + if (count < CAPIMSG_BASELEN) + return -EINVAL; + skb = alloc_skb(count, GFP_USER); if (!skb) return -ENOMEM; @@ -698,7 +701,8 @@ capi_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos } mlen = CAPIMSG_LEN(skb->data); if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_REQ) { - if ((size_t)(mlen + CAPIMSG_DATALEN(skb->data)) != count) { + if (count < CAPI_DATA_B3_REQ_LEN || + (size_t)(mlen + CAPIMSG_DATALEN(skb->data)) != count) { kfree_skb(skb); return -EINVAL; } @@ -711,6 +715,10 @@ capi_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos CAPIMSG_SETAPPID(skb->data, cdev->ap.applid); if (CAPIMSG_CMD(skb->data) == CAPI_DISCONNECT_B3_RESP) { + if (count < CAPI_DISCONNECT_B3_RESP_LEN) { + kfree_skb(skb); + return -EINVAL; + } mutex_lock(&cdev->lock); capincci_free(cdev, CAPIMSG_NCCI(skb->data)); mutex_unlock(&cdev->lock); diff --git a/include/uapi/linux/isdn/capicmd.h b/include/uapi/linux/isdn/capicmd.h index 4941628a4fb9..5ec88e7548a9 100644 --- a/include/uapi/linux/isdn/capicmd.h +++ b/include/uapi/linux/isdn/capicmd.h @@ -16,6 +16,7 @@ #define CAPI_MSG_BASELEN 8 #define CAPI_DATA_B3_REQ_LEN (CAPI_MSG_BASELEN+4+4+2+2+2) #define CAPI_DATA_B3_RESP_LEN (CAPI_MSG_BASELEN+4+2) +#define CAPI_DISCONNECT_B3_RESP_LEN (CAPI_MSG_BASELEN+4) /*----- CAPI commands -----*/ #define CAPI_ALERT 0x01 From 8652f17c658d03f5c87b8dee6e8e52480c6cd37d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Thu, 5 Sep 2019 20:56:37 -0700 Subject: [PATCH 103/169] ipv6: addrconf_f6i_alloc - fix non-null pointer check to !IS_ERR() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes a stupid bug I recently introduced... ip6_route_info_create() returns an ERR_PTR(err) and not a NULL on error. Fixes: d55a2e374a94 ("net-ipv6: fix excessive RTF_ADDRCONF flag on ::1/128 local route (and others)'") Cc: David Ahern Cc: Lorenzo Colitti Cc: Eric Dumazet Signed-off-by: Maciej Żenczykowski Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 85bcd97f7a72..546088e50815 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4406,7 +4406,7 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, } f6i = ip6_route_info_create(&cfg, gfp_flags, NULL); - if (f6i) + if (!IS_ERR(f6i)) f6i->dst_nocount = true; return f6i; } From 3dcbdb134f329842a38f0e6797191b885ab00a00 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Fri, 6 Sep 2019 12:23:50 +0300 Subject: [PATCH 104/169] net: gso: Fix skb_segment splat when splitting gso_size mangled skb having linear-headed frag_list Historically, support for frag_list packets entering skb_segment() was limited to frag_list members terminating on exact same gso_size boundaries. This is verified with a BUG_ON since commit 89319d3801d1 ("net: Add frag_list support to skb_segment"), quote: As such we require all frag_list members terminate on exact MSS boundaries. This is checked using BUG_ON. As there should only be one producer in the kernel of such packets, namely GRO, this requirement should not be difficult to maintain. However, since commit 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper"), the "exact MSS boundaries" assumption no longer holds: An eBPF program using bpf_skb_change_proto() DOES modify 'gso_size', but leaves the frag_list members as originally merged by GRO with the original 'gso_size'. Example of such programs are bpf-based NAT46 or NAT64. This lead to a kernel BUG_ON for flows involving: - GRO generating a frag_list skb - bpf program performing bpf_skb_change_proto() or bpf_skb_adjust_room() - skb_segment() of the skb See example BUG_ON reports in [0]. In commit 13acc94eff12 ("net: permit skb_segment on head_frag frag_list skb"), skb_segment() was modified to support the "gso_size mangling" case of a frag_list GRO'ed skb, but *only* for frag_list members having head_frag==true (having a page-fragment head). Alas, GRO packets having frag_list members with a linear kmalloced head (head_frag==false) still hit the BUG_ON. This commit adds support to skb_segment() for a 'head_skb' packet having a frag_list whose members are *non* head_frag, with gso_size mangled, by disabling SG and thus falling-back to copying the data from the given 'head_skb' into the generated segmented skbs - as suggested by Willem de Bruijn [1]. Since this approach involves the penalty of skb_copy_and_csum_bits() when building the segments, care was taken in order to enable this solution only when required: - untrusted gso_size, by testing SKB_GSO_DODGY is set (SKB_GSO_DODGY is set by any gso_size mangling functions in net/core/filter.c) - the frag_list is non empty, its item is a non head_frag, *and* the headlen of the given 'head_skb' does not match the gso_size. [0] https://lore.kernel.org/netdev/20190826170724.25ff616f@pixies/ https://lore.kernel.org/netdev/9265b93f-253d-6b8c-f2b8-4b54eff1835c@fb.com/ [1] https://lore.kernel.org/netdev/CA+FuTSfVsgNDi7c=GUU8nMg2hWxF2SjCNLXetHeVPdnxAW5K-w@mail.gmail.com/ Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper") Suggested-by: Willem de Bruijn Cc: Daniel Borkmann Cc: Eric Dumazet Cc: Alexander Duyck Signed-off-by: Shmulik Ladkani Reviewed-by: Willem de Bruijn Reviewed-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/skbuff.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0338820ee0ec..982d8d12830e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3664,6 +3664,25 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, int pos; int dummy; + if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) && + (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) { + /* gso_size is untrusted, and we have a frag_list with a linear + * non head_frag head. + * + * (we assume checking the first list_skb member suffices; + * i.e if either of the list_skb members have non head_frag + * head, then the first one has too). + * + * If head_skb's headlen does not fit requested gso_size, it + * means that the frag_list members do NOT terminate on exact + * gso_size boundaries. Hence we cannot perform skb_frag_t page + * sharing. Therefore we must fallback to copying the frag_list + * skbs; we do so by disabling SG. + */ + if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) + features &= ~NETIF_F_SG; + } + __skb_push(head_skb, doffset); proto = skb_network_protocol(head_skb, &dummy); if (unlikely(!proto)) From 28abe579625249161f5de943b8f8713aa8c48452 Mon Sep 17 00:00:00 2001 From: Fred Lotter Date: Fri, 6 Sep 2019 19:29:41 +0200 Subject: [PATCH 105/169] nfp: flower: cmsg rtnl locks can timeout reify messages Flower control message replies are handled in different locations. The truly high priority replies are handled in the BH (tasklet) context, while the remaining replies are handled in a predefined Linux work queue. The work queue handler orders replies into high and low priority groups, and always start servicing the high priority replies within the received batch first. Reply Type: Rtnl Lock: Handler: CMSG_TYPE_PORT_MOD no BH tasklet (mtu) CMSG_TYPE_TUN_NEIGH no BH tasklet CMSG_TYPE_FLOW_STATS no BH tasklet CMSG_TYPE_PORT_REIFY no WQ high CMSG_TYPE_PORT_MOD yes WQ high (link/mtu) CMSG_TYPE_MERGE_HINT yes WQ low CMSG_TYPE_NO_NEIGH no WQ low CMSG_TYPE_ACTIVE_TUNS no WQ low CMSG_TYPE_QOS_STATS no WQ low CMSG_TYPE_LAG_CONFIG no WQ low A subset of control messages can block waiting for an rtnl lock (from both work queue priority groups). The rtnl lock is heavily contended for by external processes such as systemd-udevd, systemd-network and libvirtd, especially during netdev creation, such as when flower VFs and representors are instantiated. Kernel netlink instrumentation shows that external processes (such as systemd-udevd) often use successive rtnl_trylock() sequences, which can result in an rtnl_lock() blocked control message to starve for longer periods of time during rtnl lock contention, i.e. netdev creation. In the current design a single blocked control message will block the entire work queue (both priorities), and introduce a latency which is nondeterministic and dependent on system wide rtnl lock usage. In some extreme cases, one blocked control message at exactly the wrong time, just before the maximum number of VFs are instantiated, can block the work queue for long enough to prevent VF representor REIFY replies from getting handled in time for the 40ms timeout. The firmware will deliver the total maximum number of REIFY message replies in around 300us. Only REIFY and MTU update messages require replies within a timeout period (of 40ms). The MTU-only updates are already done directly in the BH (tasklet) handler. Move the REIFY handler down into the BH (tasklet) in order to resolve timeouts caused by a blocked work queue waiting on rtnl locks. Signed-off-by: Fred Lotter Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/cmsg.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index d5bbe3d6048b..05981b54eaab 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -260,9 +260,6 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) type = cmsg_hdr->type; switch (type) { - case NFP_FLOWER_CMSG_TYPE_PORT_REIFY: - nfp_flower_cmsg_portreify_rx(app, skb); - break; case NFP_FLOWER_CMSG_TYPE_PORT_MOD: nfp_flower_cmsg_portmod_rx(app, skb); break; @@ -328,8 +325,7 @@ nfp_flower_queue_ctl_msg(struct nfp_app *app, struct sk_buff *skb, int type) struct nfp_flower_priv *priv = app->priv; struct sk_buff_head *skb_head; - if (type == NFP_FLOWER_CMSG_TYPE_PORT_REIFY || - type == NFP_FLOWER_CMSG_TYPE_PORT_MOD) + if (type == NFP_FLOWER_CMSG_TYPE_PORT_MOD) skb_head = &priv->cmsg_skbs_high; else skb_head = &priv->cmsg_skbs_low; @@ -368,6 +364,10 @@ void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb) } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH) { /* Acks from the NFP that the route is added - ignore. */ dev_consume_skb_any(skb); + } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_PORT_REIFY) { + /* Handle REIFY acks outside wq to prevent RTNL conflict. */ + nfp_flower_cmsg_portreify_rx(app, skb); + dev_consume_skb_any(skb); } else { nfp_flower_queue_ctl_msg(app, skb, cmsg_hdr->type); } From a8e0abae2fe0e788fa3d92c043605d1211c13735 Mon Sep 17 00:00:00 2001 From: Trilok Soni Date: Fri, 6 Sep 2019 12:01:57 -0700 Subject: [PATCH 106/169] Documentation/process: Add Qualcomm process ambassador for hardware security issues Add Trilok Soni as process ambassador for hardware security issues from Qualcomm. Signed-off-by: Trilok Soni Link: https://lore.kernel.org/r/1567796517-8964-1-git-send-email-tsoni@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- Documentation/process/embargoed-hardware-issues.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index c101748bfd92..402636356fbe 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -217,7 +217,7 @@ an involved disclosed party. The current ambassadors list: AMD IBM Intel - Qualcomm + Qualcomm Trilok Soni Microsoft Sasha Levin VMware From fb377eb80c80339b580831a3c0fcce34a4c9d1ad Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 5 Sep 2019 16:48:38 +0200 Subject: [PATCH 107/169] ipc: fix sparc64 ipc() wrapper Matt bisected a sparc64 specific issue with semctl, shmctl and msgctl to a commit from my y2038 series in linux-5.1, as I missed the custom sys_ipc() wrapper that sparc64 uses in place of the generic version that I patched. The problem is that the sys_{sem,shm,msg}ctl() functions in the kernel now do not allow being called with the IPC_64 flag any more, resulting in a -EINVAL error when they don't recognize the command. Instead, the correct way to do this now is to call the internal ksys_old_{sem,shm,msg}ctl() functions to select the API version. As we generally move towards these functions anyway, change all of sparc_ipc() to consistently use those in place of the sys_*() versions, and move the required ksys_*() declarations into linux/syscalls.h The IS_ENABLED(CONFIG_SYSVIPC) check is required to avoid link errors when ipc is disabled. Reported-by: Matt Turner Fixes: 275f22148e87 ("ipc: rename old-style shmctl/semctl/msgctl syscalls") Cc: stable@vger.kernel.org Tested-by: Matt Turner Tested-by: Anatoly Pugachev Signed-off-by: Arnd Bergmann --- arch/sparc/kernel/sys_sparc_64.c | 33 +++++++++++++++++--------------- include/linux/syscalls.h | 19 ++++++++++++++++++ ipc/util.h | 25 ++---------------------- 3 files changed, 39 insertions(+), 38 deletions(-) diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index ccc88926bc00..9f41a6f5a032 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -336,25 +336,28 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second { long err; + if (!IS_ENABLED(CONFIG_SYSVIPC)) + return -ENOSYS; + /* No need for backward compatibility. We can start fresh... */ if (call <= SEMTIMEDOP) { switch (call) { case SEMOP: - err = sys_semtimedop(first, ptr, - (unsigned int)second, NULL); + err = ksys_semtimedop(first, ptr, + (unsigned int)second, NULL); goto out; case SEMTIMEDOP: - err = sys_semtimedop(first, ptr, (unsigned int)second, + err = ksys_semtimedop(first, ptr, (unsigned int)second, (const struct __kernel_timespec __user *) - (unsigned long) fifth); + (unsigned long) fifth); goto out; case SEMGET: - err = sys_semget(first, (int)second, (int)third); + err = ksys_semget(first, (int)second, (int)third); goto out; case SEMCTL: { - err = sys_semctl(first, second, - (int)third | IPC_64, - (unsigned long) ptr); + err = ksys_old_semctl(first, second, + (int)third | IPC_64, + (unsigned long) ptr); goto out; } default: @@ -365,18 +368,18 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second if (call <= MSGCTL) { switch (call) { case MSGSND: - err = sys_msgsnd(first, ptr, (size_t)second, + err = ksys_msgsnd(first, ptr, (size_t)second, (int)third); goto out; case MSGRCV: - err = sys_msgrcv(first, ptr, (size_t)second, fifth, + err = ksys_msgrcv(first, ptr, (size_t)second, fifth, (int)third); goto out; case MSGGET: - err = sys_msgget((key_t)first, (int)second); + err = ksys_msgget((key_t)first, (int)second); goto out; case MSGCTL: - err = sys_msgctl(first, (int)second | IPC_64, ptr); + err = ksys_old_msgctl(first, (int)second | IPC_64, ptr); goto out; default: err = -ENOSYS; @@ -396,13 +399,13 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second goto out; } case SHMDT: - err = sys_shmdt(ptr); + err = ksys_shmdt(ptr); goto out; case SHMGET: - err = sys_shmget(first, (size_t)second, (int)third); + err = ksys_shmget(first, (size_t)second, (int)third); goto out; case SHMCTL: - err = sys_shmctl(first, (int)second | IPC_64, ptr); + err = ksys_old_shmctl(first, (int)second | IPC_64, ptr); goto out; default: err = -ENOSYS; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 88145da7d140..f7c561c4dcdd 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1402,4 +1402,23 @@ static inline unsigned int ksys_personality(unsigned int personality) return old; } +/* for __ARCH_WANT_SYS_IPC */ +long ksys_semtimedop(int semid, struct sembuf __user *tsops, + unsigned int nsops, + const struct __kernel_timespec __user *timeout); +long ksys_semget(key_t key, int nsems, int semflg); +long ksys_old_semctl(int semid, int semnum, int cmd, unsigned long arg); +long ksys_msgget(key_t key, int msgflg); +long ksys_old_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); +long ksys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, + long msgtyp, int msgflg); +long ksys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, + int msgflg); +long ksys_shmget(key_t key, size_t size, int shmflg); +long ksys_shmdt(char __user *shmaddr); +long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); +long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, + unsigned int nsops, + const struct old_timespec32 __user *timeout); + #endif diff --git a/ipc/util.h b/ipc/util.h index 0fcf8e719b76..5766c61aed0e 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -276,29 +276,7 @@ static inline int compat_ipc_parse_version(int *cmd) *cmd &= ~IPC_64; return version; } -#endif -/* for __ARCH_WANT_SYS_IPC */ -long ksys_semtimedop(int semid, struct sembuf __user *tsops, - unsigned int nsops, - const struct __kernel_timespec __user *timeout); -long ksys_semget(key_t key, int nsems, int semflg); -long ksys_old_semctl(int semid, int semnum, int cmd, unsigned long arg); -long ksys_msgget(key_t key, int msgflg); -long ksys_old_msgctl(int msqid, int cmd, struct msqid_ds __user *buf); -long ksys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, - long msgtyp, int msgflg); -long ksys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, - int msgflg); -long ksys_shmget(key_t key, size_t size, int shmflg); -long ksys_shmdt(char __user *shmaddr); -long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); - -/* for CONFIG_ARCH_WANT_OLD_COMPAT_IPC */ -long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, - unsigned int nsops, - const struct old_timespec32 __user *timeout); -#ifdef CONFIG_COMPAT long compat_ksys_old_semctl(int semid, int semnum, int cmd, int arg); long compat_ksys_old_msgctl(int msqid, int cmd, void __user *uptr); long compat_ksys_msgrcv(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, @@ -306,6 +284,7 @@ long compat_ksys_msgrcv(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, long compat_ksys_msgsnd(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, int msgflg); long compat_ksys_old_shmctl(int shmid, int cmd, void __user *uptr); -#endif /* CONFIG_COMPAT */ + +#endif #endif From 950b07c14e8c59444e2359f15fd70ed5112e11a0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 7 Sep 2019 14:25:54 -0700 Subject: [PATCH 108/169] Revert "x86/apic: Include the LDR when clearing out APIC registers" This reverts commit 558682b5291937a70748d36fd9ba757fb25b99ae. Chris Wilson reports that it breaks his CPU hotplug test scripts. In particular, it breaks offlining and then re-onlining the boot CPU, which we treat specially (and the BIOS does too). The symptoms are that we can offline the CPU, but it then does not come back online again: smpboot: CPU 0 is now offline smpboot: Booting Node 0 Processor 0 APIC 0x0 smpboot: do_boot_cpu failed(-1) to wakeup CPU#0 Thomas says he knows why it's broken (my personal suspicion: our magic handling of the "cpu0_logical_apicid" thing), but for 5.3 the right fix is to just revert it, since we've never touched the LDR bits before, and it's not worth the risk to do anything else at this stage. [ Hotpluging of the boot CPU is special anyway, and should be off by default. See the "BOOTPARAM_HOTPLUG_CPU0" config option and the cpu0_hotplug kernel parameter. In general you should not do it, and it has various known limitations (hibernate and suspend require the boot CPU, for example). But it should work, even if the boot CPU is special and needs careful treatment - Linus ] Link: https://lore.kernel.org/lkml/156785100521.13300.14461504732265570003@skylake-alporthouse-com/ Reported-by: Chris Wilson Acked-by: Thomas Gleixner Cc: Bandan Das Signed-off-by: Linus Torvalds --- arch/x86/kernel/apic/apic.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index dba2828b779a..aa5495d0f478 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1179,10 +1179,6 @@ void clear_local_APIC(void) apic_write(APIC_LVT0, v | APIC_LVT_MASKED); v = apic_read(APIC_LVT1); apic_write(APIC_LVT1, v | APIC_LVT_MASKED); - if (!x2apic_enabled()) { - v = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - apic_write(APIC_LDR, v); - } if (maxlvt >= 4) { v = apic_read(APIC_LVTPC); apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); From afa8b475c1aec185a8e106c48b3832e0b88bc2de Mon Sep 17 00:00:00 2001 From: Jan Stancek Date: Sun, 8 Sep 2019 00:50:40 +0200 Subject: [PATCH 109/169] x86/timer: Force PIT initialization when !X86_FEATURE_ARAT KVM guests with commit c8c4076723da ("x86/timer: Skip PIT initialization on modern chipsets") applied to guest kernel have been observed to have unusually higher CPU usage with symptoms of increase in vm exits for HLT and MSW_WRITE (MSR_IA32_TSCDEADLINE). This is caused by older QEMUs lacking support for X86_FEATURE_ARAT. lapic clock retains CLOCK_EVT_FEAT_C3STOP and nohz stays inactive. There's no usable broadcast device either. Do the PIT initialization if guest CPU lacks X86_FEATURE_ARAT. On real hardware it shouldn't matter as ARAT and DEADLINE come together. Fixes: c8c4076723da ("x86/timer: Skip PIT initialization on modern chipsets") Signed-off-by: Jan Stancek Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/apic.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index dba2828b779a..f91b3ff9dc03 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -834,6 +834,10 @@ bool __init apic_needs_pit(void) if (!boot_cpu_has(X86_FEATURE_APIC)) return true; + /* Virt guests may lack ARAT, but still have DEADLINE */ + if (!boot_cpu_has(X86_FEATURE_ARAT)) + return true; + /* Deadline timer is based on TSC so no further PIT action required */ if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) return false; From bfafddd8de426d894fcf3e062370b1efaa195ebc Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 28 Aug 2019 15:55:23 -0700 Subject: [PATCH 110/169] include/linux/compiler.h: fix Oops for Clang-compiled kernels GCC unescapes escaped string section names while Clang does not. Because __section uses the `#` stringification operator for the section name, it doesn't need to be escaped. This fixes an Oops observed in distro's that use systemd and not net.core.bpf_jit_enable=1, when their kernels are compiled with Clang. Link: https://github.com/ClangBuiltLinux/linux/issues/619 Link: https://bugs.llvm.org/show_bug.cgi?id=42950 Link: https://marc.info/?l=linux-netdev&m=156412960619946&w=2 Link: https://lore.kernel.org/lkml/20190904181740.GA19688@gmail.com/ Acked-by: Will Deacon Reported-by: Sedat Dilek Suggested-by: Josh Poimboeuf Tested-by: Sedat Dilek Signed-off-by: Nick Desaulniers [Cherry-picked from the __section cleanup series for 5.3] [Adjusted commit message] Signed-off-by: Miguel Ojeda --- include/linux/compiler.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index f0fd5636fddb..5e88e7e33abe 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -24,7 +24,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, long ______r; \ static struct ftrace_likely_data \ __aligned(4) \ - __section("_ftrace_annotated_branch") \ + __section(_ftrace_annotated_branch) \ ______f = { \ .data.func = __func__, \ .data.file = __FILE__, \ @@ -60,7 +60,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #define __trace_if_value(cond) ({ \ static struct ftrace_branch_data \ __aligned(4) \ - __section("_ftrace_branch") \ + __section(_ftrace_branch) \ __if_trace = { \ .func = __func__, \ .file = __FILE__, \ @@ -118,7 +118,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, ".popsection\n\t" /* Annotate a C jump table to allow objtool to follow the code flow */ -#define __annotate_jump_table __section(".rodata..c_jump_table") +#define __annotate_jump_table __section(.rodata..c_jump_table) #else #define annotate_reachable() @@ -298,7 +298,7 @@ unsigned long read_word_at_a_time(const void *addr) * visible to the compiler. */ #define __ADDRESSABLE(sym) \ - static void * __section(".discard.addressable") __used \ + static void * __section(.discard.addressable) __used \ __PASTE(__addressable_##sym, __LINE__) = (void *)&sym; /** From f74c2bb98776e2de508f4d607cd519873065118e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 8 Sep 2019 13:33:15 -0700 Subject: [PATCH 111/169] Linux 5.3-rc8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0cbe8717bdb3..9b08f6383a52 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 3 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Bobtail Squid # *DOCUMENTATION* From 59929d3a2eb6c4abafc5b61a20c98aa8728ec378 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 4 Sep 2019 14:18:34 +0000 Subject: [PATCH 112/169] gpio: mockup: add missing single_release() When using single_open() for opening, single_release() should be used instead of seq_release(), otherwise there is a memory leak. Fixes: 2a9e27408e12 ("gpio: mockup: rework debugfs interface") Cc: stable Signed-off-by: Wei Yongjun Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mockup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c index f1a9c0544e3f..213aedc97dc2 100644 --- a/drivers/gpio/gpio-mockup.c +++ b/drivers/gpio/gpio-mockup.c @@ -309,6 +309,7 @@ static const struct file_operations gpio_mockup_debugfs_ops = { .read = gpio_mockup_debugfs_read, .write = gpio_mockup_debugfs_write, .llseek = no_llseek, + .release = single_release, }; static void gpio_mockup_debugfs_setup(struct device *dev, From e95fbc130a162ba9ad956311b95aa0da269eea48 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 9 Sep 2019 03:22:18 +0000 Subject: [PATCH 113/169] gpio: fix line flag validation in linehandle_create linehandle_create should not allow both GPIOHANDLE_REQUEST_INPUT and GPIOHANDLE_REQUEST_OUTPUT to be set. Fixes: d7c51b47ac11 ("gpio: userspace ABI for reading/writing GPIO lines") Cc: stable Signed-off-by: Kent Gibson Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index cca749010cd0..7502230a4ec8 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -535,6 +535,14 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip) if (lflags & ~GPIOHANDLE_REQUEST_VALID_FLAGS) return -EINVAL; + /* + * Do not allow both INPUT & OUTPUT flags to be set as they are + * contradictory. + */ + if ((lflags & GPIOHANDLE_REQUEST_INPUT) && + (lflags & GPIOHANDLE_REQUEST_OUTPUT)) + return -EINVAL; + /* * Do not allow OPEN_SOURCE & OPEN_DRAIN flags in a single request. If * the hardware actually supports enabling both at the same time the From 5ca2f54b597c816df54ff1b28eb99cf7262b955d Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 9 Sep 2019 03:24:06 +0000 Subject: [PATCH 114/169] gpio: fix line flag validation in lineevent_create lineevent_create should not allow any of GPIOHANDLE_REQUEST_OUTPUT, GPIOHANDLE_REQUEST_OPEN_DRAIN or GPIOHANDLE_REQUEST_OPEN_SOURCE to be set. Fixes: d7c51b47ac11 ("gpio: userspace ABI for reading/writing GPIO lines") Cc: stable Signed-off-by: Kent Gibson Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 7502230a4ec8..d9074191edef 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -934,7 +934,9 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) } /* This is just wrong: we don't look for events on output lines */ - if (lflags & GPIOHANDLE_REQUEST_OUTPUT) { + if ((lflags & GPIOHANDLE_REQUEST_OUTPUT) || + (lflags & GPIOHANDLE_REQUEST_OPEN_DRAIN) || + (lflags & GPIOHANDLE_REQUEST_OPEN_SOURCE)) { ret = -EINVAL; goto out_free_label; } @@ -948,10 +950,6 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) if (lflags & GPIOHANDLE_REQUEST_ACTIVE_LOW) set_bit(FLAG_ACTIVE_LOW, &desc->flags); - if (lflags & GPIOHANDLE_REQUEST_OPEN_DRAIN) - set_bit(FLAG_OPEN_DRAIN, &desc->flags); - if (lflags & GPIOHANDLE_REQUEST_OPEN_SOURCE) - set_bit(FLAG_OPEN_SOURCE, &desc->flags); ret = gpiod_direction_input(desc); if (ret) From bb1a71f9c4672fbfcf2158fd57d0c5c0cdae5612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 28 Aug 2019 13:20:59 +0300 Subject: [PATCH 115/169] drm/i915: Limit MST to <= 8bpc once again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My attempt at allowing MST to use the higher color depths has regressed some configurations. Apparently people have setups where all MST streams will fit into the DP link with 8bpc but won't fit with higher color depths. What we really should be doing is reducing the bpc for all the streams on the same link until they start to fit. But that requires a bit more work, so in the meantime let's revert back closer to the old behavior and limit MST to at most 8bpc. Cc: stable@vger.kernel.org Cc: Lyude Paul Tested-by: Geoffrey Bennett Fixes: f1477219869c ("drm/i915: Remove the 8bpc shackles from DP MST") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111505 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190828102059.2512-1-ville.syrjala@linux.intel.com Reviewed-by: Lyude Paul (cherry picked from commit 75427b2a2bffc083d51dec389c235722a9c69b05) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 18e4cba76720..8aa6a31e8ad0 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -128,7 +128,15 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder, limits.max_lane_count = intel_dp_max_lane_count(intel_dp); limits.min_bpp = intel_dp_min_bpp(pipe_config); - limits.max_bpp = pipe_config->pipe_bpp; + /* + * FIXME: If all the streams can't fit into the link with + * their current pipe_bpp we should reduce pipe_bpp across + * the board until things start to fit. Until then we + * limit to <= 8bpc since that's what was hardcoded for all + * MST streams previously. This hack should be removed once + * we have the proper retry logic in place. + */ + limits.max_bpp = min(pipe_config->pipe_bpp, 24); intel_dp_adjust_compliance_config(intel_dp, pipe_config, &limits); From 2eb0964eec5f1d99f9eaf4963eee267acc72b615 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 4 Sep 2019 11:07:07 +0100 Subject: [PATCH 116/169] drm/i915: Restore relaxed padding (OCL_OOB_SUPPRES_ENABLE) for skl+ This bit was fliped on for "syncing dependencies between camera and graphics". BSpec has no recollection why, and it is causing unrecoverable GPU hangs with Vulkan compute workloads. From BSpec, setting bit5 to 0 enables relaxed padding requirements for buffers, 1D and 2D non-array, non-MSAA, non-mip-mapped linear surfaces; and *must* be set to 0h on skl+ to ensure "Out of Bounds" case is suppressed. Reported-by: Jason Ekstrand Suggested-by: Jason Ekstrand Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110998 Fixes: 8424171e135c ("drm/i915/gen9: h/w w/a: syncing dependencies between camera and graphics") Signed-off-by: Chris Wilson Tested-by: denys.kostin@globallogic.com Cc: Jason Ekstrand Cc: Mika Kuoppala Cc: # v4.1+ Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190904100707.7377-1-chris@chris-wilson.co.uk (cherry picked from commit 9d7b01e93526efe79dbf75b69cc5972b5a4f7b37) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 98dfb086320f..99e8242194c0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -308,11 +308,6 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, FLOW_CONTROL_ENABLE | PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ - if (!IS_COFFEELAKE(i915)) - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, From cf8f1696709ad5bb3138ed8c771c2eb98950cd8a Mon Sep 17 00:00:00 2001 From: Matthias Lange Date: Fri, 6 Sep 2019 16:59:01 +0200 Subject: [PATCH 117/169] virtio_ring: fix unmap of indirect descriptors The function virtqueue_add_split() DMA-maps the scatterlist buffers. In case a mapping error occurs the already mapped buffers must be unmapped. This happens by jumping to the 'unmap_release' label. In case of indirect descriptors the release is wrong and may leak kernel memory. Because the implementation assumes that the head descriptor is already mapped it starts iterating over the descriptor list starting from the head descriptor. However for indirect descriptors the head descriptor is never mapped in case of an error. The fix is to initialize the start index with zero in case of indirect descriptors and use the 'desc' pointer directly for iterating over the descriptor chain. Signed-off-by: Matthias Lange Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index c8be1c4f5b55..bdc08244a648 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -566,13 +566,17 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, unmap_release: err_idx = i; - i = head; + + if (indirect) + i = 0; + else + i = head; for (n = 0; n < total_sg; n++) { if (i == err_idx) break; vring_unmap_one_split(vq, &desc[i]); - i = virtio16_to_cpu(_vq->vdev, vq->split.vring.desc[i].next); + i = virtio16_to_cpu(_vq->vdev, desc[i].next); } if (indirect) From 21670bd78a25001cf8ef2679b378c73fb73b904f Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Sat, 7 Sep 2019 19:48:00 -0700 Subject: [PATCH 118/169] drm/lima: fix lima_gem_wait() return value drm_gem_reservation_object_wait() returns 0 if it succeeds and -ETIME if it timeouts, but lima driver assumed that 0 is error. Cc: stable@vger.kernel.org Fixes: a1d2a6339961e ("drm/lima: driver for ARM Mali4xx GPUs") Signed-off-by: Vasily Khoruzhick Signed-off-by: Qiang Yu Link: https://patchwork.freedesktop.org/patch/msgid/20190908024800.23229-1-anarsoul@gmail.com --- drivers/gpu/drm/lima/lima_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c index 477c0f766663..b609dc030d6c 100644 --- a/drivers/gpu/drm/lima/lima_gem.c +++ b/drivers/gpu/drm/lima/lima_gem.c @@ -342,7 +342,7 @@ int lima_gem_wait(struct drm_file *file, u32 handle, u32 op, s64 timeout_ns) timeout = drm_timeout_abs_to_jiffies(timeout_ns); ret = drm_gem_reservation_object_wait(file, handle, write, timeout); - if (ret == 0) + if (ret == -ETIME) ret = timeout ? -ETIMEDOUT : -EBUSY; return ret; From c8dc55956b09b53ccffceb6e3146981210e27821 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Mon, 9 Sep 2019 22:44:51 +0200 Subject: [PATCH 119/169] net/ibmvnic: Fix missing { in __ibmvnic_reset Commit 1c2977c09499 ("net/ibmvnic: free reset work of removed device from queue") adds a } without corresponding { causing build break. Fixes: 1c2977c09499 ("net/ibmvnic: free reset work of removed device from queue") Signed-off-by: Michal Suchanek Reviewed-by: Tyrel Datwyler Reviewed-by: Juliet Kim Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 6644cabc8e75..5cb55ea671e3 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1984,7 +1984,7 @@ static void __ibmvnic_reset(struct work_struct *work) rwi = get_next_rwi(adapter); while (rwi) { if (adapter->state == VNIC_REMOVING || - adapter->state == VNIC_REMOVED) + adapter->state == VNIC_REMOVED) { kfree(rwi); rc = EBUSY; break; From 94a72b3f024fc7e9ab640897a1e38583a470659d Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 6 Sep 2019 11:47:02 +0200 Subject: [PATCH 120/169] bridge/mdb: remove wrong use of NLM_F_MULTI NLM_F_MULTI must be used only when a NLMSG_DONE message is sent at the end. In fact, NLMSG_DONE is sent only at the end of a dump. Libraries like libnl will wait forever for NLMSG_DONE. Fixes: 949f1e39a617 ("bridge: mdb: notify on router port add and del") CC: Nikolay Aleksandrov Signed-off-by: Nicolas Dichtel Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index bf6acd34234d..63f9c08625f0 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -437,7 +437,7 @@ static int nlmsg_populate_rtr_fill(struct sk_buff *skb, struct nlmsghdr *nlh; struct nlattr *nest; - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0); if (!nlh) return -EMSGSIZE; From 1dea33e84d33a5220aeda4ca9e918de0fd77c078 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 3 Sep 2019 16:18:56 -0700 Subject: [PATCH 121/169] gpiolib: of: fix fallback quirks handling We should only try to execute fallback quirks handling when previous call returned -ENOENT, and not when we did not get -EPROBE_DEFER. The other errors should be treated as hard errors: we did find the GPIO description, but for some reason we failed to handle it properly. The fallbacks should only be executed when previous handlers returned -ENOENT, which means the mapping/description was not found. Also let's remove the explicit deferral handling when iterating through GPIO suffixes: it is not needed anymore as we will not be calling fallbacks for anything but -ENOENT. Fixes: df451f83e1fc ("gpio: of: fix Freescale SPI CS quirk handling") Signed-off-by: Dmitry Torokhov Link: https://lore.kernel.org/r/20190903231856.GA165165@dtor-ws Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-of.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 9762dd6d99fa..9b44c49a9227 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -343,36 +343,27 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id, desc = of_get_named_gpiod_flags(dev->of_node, prop_name, idx, &of_flags); - /* - * -EPROBE_DEFER in our case means that we found a - * valid GPIO property, but no controller has been - * registered so far. - * - * This means we don't need to look any further for - * alternate name conventions, and we should really - * preserve the return code for our user to be able to - * retry probing later. - */ - if (IS_ERR(desc) && PTR_ERR(desc) == -EPROBE_DEFER) - return desc; - if (!IS_ERR(desc) || (PTR_ERR(desc) != -ENOENT)) + if (!IS_ERR(desc) || PTR_ERR(desc) != -ENOENT) break; } - /* Special handling for SPI GPIOs if used */ - if (IS_ERR(desc)) + if (IS_ERR(desc) && PTR_ERR(desc) == -ENOENT) { + /* Special handling for SPI GPIOs if used */ desc = of_find_spi_gpio(dev, con_id, &of_flags); - if (IS_ERR(desc) && PTR_ERR(desc) != -EPROBE_DEFER) { + } + + if (IS_ERR(desc) && PTR_ERR(desc) == -ENOENT) { /* This quirk looks up flags and all */ desc = of_find_spi_cs_gpio(dev, con_id, idx, flags); if (!IS_ERR(desc)) return desc; } - /* Special handling for regulator GPIOs if used */ - if (IS_ERR(desc) && PTR_ERR(desc) != -EPROBE_DEFER) + if (IS_ERR(desc) && PTR_ERR(desc) == -ENOENT) { + /* Special handling for regulator GPIOs if used */ desc = of_find_regulator_gpio(dev, con_id, &of_flags); + } if (IS_ERR(desc)) return desc; From 8b142a00edcf8422ca48b8de88d286efb500cb53 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 8 Sep 2019 12:11:23 -0700 Subject: [PATCH 122/169] net_sched: check cops->tcf_block in tc_bind_tclass() At least sch_red and sch_tbf don't implement ->tcf_block() while still have a non-zero tc "class". Instead of adding nop implementations to each of such qdisc's, we can just relax the check of cops->tcf_block() in tc_bind_tclass(). They don't support TC filter anyway. Reported-by: syzbot+21b29db13c065852f64b@syzkaller.appspotmail.com Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 04faee7ccbce..1047825d9f48 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1920,6 +1920,8 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, cl = cops->find(q, portid); if (!cl) return; + if (!cops->tcf_block) + return; block = cops->tcf_block(q, cl, NULL); if (!block) return; From d4d6ec6dac07f263f06d847d6f732d6855522845 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 8 Sep 2019 13:40:51 -0700 Subject: [PATCH 123/169] sch_hhf: ensure quantum and hhf_non_hh_weight are non-zero In case of TCA_HHF_NON_HH_WEIGHT or TCA_HHF_QUANTUM is zero, it would make no progress inside the loop in hhf_dequeue() thus kernel would get stuck. Fix this by checking this corner case in hhf_change(). Fixes: 10239edf86f1 ("net-qdisc-hhf: Heavy-Hitter Filter (HHF) qdisc") Reported-by: syzbot+bc6297c11f19ee807dc2@syzkaller.appspotmail.com Reported-by: syzbot+041483004a7f45f1f20a@syzkaller.appspotmail.com Reported-by: syzbot+55be5f513bed37fc4367@syzkaller.appspotmail.com Cc: Jamal Hadi Salim Cc: Jiri Pirko Cc: Terry Lam Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_hhf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index cee6971c1c82..23cd1c873a2c 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -531,7 +531,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt, new_hhf_non_hh_weight = nla_get_u32(tb[TCA_HHF_NON_HH_WEIGHT]); non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight; - if (non_hh_quantum > INT_MAX) + if (non_hh_quantum == 0 || non_hh_quantum > INT_MAX) return -EINVAL; sch_tree_lock(sch); From f794dc2304d83ab998c2eee5bab0549aff5c53a2 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 9 Sep 2019 15:33:29 +0800 Subject: [PATCH 124/169] sctp: fix the missing put_user when dumping transport thresholds This issue causes SCTP_PEER_ADDR_THLDS sockopt not to be able to dump a transport thresholds info. Fix it by adding 'goto' put_user in sctp_getsockopt_paddr_thresholds. Fixes: 8add543e369d ("sctp: add SCTP_FUTURE_ASSOC for SCTP_PEER_ADDR_THLDS sockopt") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9d1f83b10c0a..ad8751891b00 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7173,7 +7173,7 @@ static int sctp_getsockopt_paddr_thresholds(struct sock *sk, val.spt_pathmaxrxt = trans->pathmaxrxt; val.spt_pathpfthld = trans->pf_retrans; - return 0; + goto out; } asoc = sctp_id2assoc(sk, val.spt_assoc_id); @@ -7191,6 +7191,7 @@ static int sctp_getsockopt_paddr_thresholds(struct sock *sk, val.spt_pathmaxrxt = sp->pathmaxrxt; } +out: if (put_user(len, optlen) || copy_to_user(optval, &val, len)) return -EFAULT; From 2507e6ab7a9a440773be476141a255934468c5ef Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Tue, 10 Sep 2019 18:01:40 -0500 Subject: [PATCH 125/169] wimax: i2400: fix memory leak In i2400m_op_rfkill_sw_toggle cmd buffer should be released along with skb response. Signed-off-by: Navid Emamdoost Signed-off-by: David S. Miller --- drivers/net/wimax/i2400m/op-rfkill.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wimax/i2400m/op-rfkill.c b/drivers/net/wimax/i2400m/op-rfkill.c index 6642bcb27761..8efb493ceec2 100644 --- a/drivers/net/wimax/i2400m/op-rfkill.c +++ b/drivers/net/wimax/i2400m/op-rfkill.c @@ -127,6 +127,7 @@ int i2400m_op_rfkill_sw_toggle(struct wimax_dev *wimax_dev, "%d\n", result); result = 0; error_cmd: + kfree(cmd); kfree_skb(ack_skb); error_msg_to_dev: error_alloc: From 49f6c90bf6805948b597eabb499e500a47cf24be Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Wed, 11 Sep 2019 09:36:23 +0800 Subject: [PATCH 126/169] net: sonic: replace dev_kfree_skb in sonic_send_packet sonic_send_packet will be processed in irq or non-irq context, so it would better use dev_kfree_skb_any instead of dev_kfree_skb. Fixes: d9fb9f384292 ("*sonic/natsemi/ns83829: Move the National Semi-conductor drivers") Signed-off-by: Mao Wenan Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/sonic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 18fd62fbfb64..b339125b2f09 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -233,7 +233,7 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) laddr = dma_map_single(lp->device, skb->data, length, DMA_TO_DEVICE); if (!laddr) { pr_err_ratelimited("%s: failed to map tx DMA buffer.\n", dev->name); - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } From 3dfdecc6d1256835ac0612b2a8f7595466d66327 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 9 Sep 2019 14:54:21 -0700 Subject: [PATCH 127/169] lib/Kconfig: fix OBJAGG in lib/ menu structure Keep the "Library routines" menu intact by moving OBJAGG into it. Otherwise OBJAGG is displayed/presented as an orphan in the various config menus. Fixes: 0a020d416d0a ("lib: introduce initial implementation of object aggregation manager") Signed-off-by: Randy Dunlap Cc: Jiri Pirko Cc: Ido Schimmel Cc: David S. Miller Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- lib/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Kconfig b/lib/Kconfig index f33d66fc0e86..4e6b1c3e4c98 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -631,6 +631,9 @@ config SBITMAP config PARMAN tristate "parman" if COMPILE_TEST +config OBJAGG + tristate "objagg" if COMPILE_TEST + config STRING_SELFTEST tristate "Test string functions" @@ -653,6 +656,3 @@ config GENERIC_LIB_CMPDI2 config GENERIC_LIB_UCMPDI2 bool - -config OBJAGG - tristate "objagg" if COMPILE_TEST From 61f7f7c8f978b1c0d80e43c83b7d110ca0496eb4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 27 Aug 2019 22:28:35 +0200 Subject: [PATCH 128/169] gpiolib: acpi: Add gpiolib_acpi_run_edge_events_on_boot option and blacklist Another day; another DSDT bug we need to workaround... Since commit ca876c7483b6 ("gpiolib-acpi: make sure we trigger edge events at least once on boot") we call _AEI edge handlers at boot. In some rare cases this causes problems. One example of this is the Minix Neo Z83-4 mini PC, this device has a clear DSDT bug where it has some copy and pasted code for dealing with Micro USB-B connector host/device role switching, while the mini PC does not even have a micro-USB connector. This code, which should not be there, messes with the DDC data pin from the HDMI connector (switching it to GPIO mode) breaking HDMI support. To avoid problems like this, this commit adds a new gpiolib_acpi.run_edge_events_on_boot kernel commandline option, which allows disabling the running of _AEI edge event handlers at boot. The default value is -1/auto which uses a DMI based blacklist, the initial version of this blacklist contains the Neo Z83-4 fixing the HDMI breakage. Cc: stable@vger.kernel.org Cc: Daniel Drake Cc: Ian W MORRISON Reported-by: Ian W MORRISON Suggested-by: Ian W MORRISON Fixes: ca876c7483b6 ("gpiolib-acpi: make sure we trigger edge events at least once on boot") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20190827202835.213456-1-hdegoede@redhat.com Acked-by: Mika Westerberg Reviewed-by: Andy Shevchenko Tested-by: Ian W MORRISON Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-acpi.c | 42 +++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 39f2f9035c11..bda28eb82c3f 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -7,6 +7,7 @@ * Mika Westerberg */ +#include #include #include #include @@ -19,6 +20,11 @@ #include "gpiolib.h" +static int run_edge_events_on_boot = -1; +module_param(run_edge_events_on_boot, int, 0444); +MODULE_PARM_DESC(run_edge_events_on_boot, + "Run edge _AEI event-handlers at boot: 0=no, 1=yes, -1=auto"); + /** * struct acpi_gpio_event - ACPI GPIO event handler data * @@ -170,10 +176,13 @@ static void acpi_gpiochip_request_irq(struct acpi_gpio_chip *acpi_gpio, event->irq_requested = true; /* Make sure we trigger the initial state of edge-triggered IRQs */ - value = gpiod_get_raw_value_cansleep(event->desc); - if (((event->irqflags & IRQF_TRIGGER_RISING) && value == 1) || - ((event->irqflags & IRQF_TRIGGER_FALLING) && value == 0)) - event->handler(event->irq, event); + if (run_edge_events_on_boot && + (event->irqflags & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING))) { + value = gpiod_get_raw_value_cansleep(event->desc); + if (((event->irqflags & IRQF_TRIGGER_RISING) && value == 1) || + ((event->irqflags & IRQF_TRIGGER_FALLING) && value == 0)) + event->handler(event->irq, event); + } } static void acpi_gpiochip_request_irqs(struct acpi_gpio_chip *acpi_gpio) @@ -1283,3 +1292,28 @@ static int acpi_gpio_handle_deferred_request_irqs(void) } /* We must use _sync so that this runs after the first deferred_probe run */ late_initcall_sync(acpi_gpio_handle_deferred_request_irqs); + +static const struct dmi_system_id run_edge_events_on_boot_blacklist[] = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MINIX"), + DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"), + } + }, + {} /* Terminating entry */ +}; + +static int acpi_gpio_setup_params(void) +{ + if (run_edge_events_on_boot < 0) { + if (dmi_check_system(run_edge_events_on_boot_blacklist)) + run_edge_events_on_boot = 0; + else + run_edge_events_on_boot = 1; + } + + return 0; +} + +/* Directly after dmi_setup() which runs as core_initcall() */ +postcore_initcall(acpi_gpio_setup_params); From aea64b583601aa5e0d6ea51a0420e46e43710bd4 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 8 Sep 2019 09:45:52 +0200 Subject: [PATCH 129/169] Revert "mmc: bcm2835: Terminate timeout work synchronously" The commit 37fefadee8bb ("mmc: bcm2835: Terminate timeout work synchronously") causes lockups in case of hardware timeouts due the timeout work also calling cancel_delayed_work_sync() on its own. So revert it. Fixes: 37fefadee8bb ("mmc: bcm2835: Terminate timeout work synchronously") Cc: stable@vger.kernel.org Signed-off-by: Stefan Wahren Signed-off-by: Ulf Hansson --- drivers/mmc/host/bcm2835.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c index 7e0d3a49c06d..bb31e13648d6 100644 --- a/drivers/mmc/host/bcm2835.c +++ b/drivers/mmc/host/bcm2835.c @@ -597,7 +597,7 @@ static void bcm2835_finish_request(struct bcm2835_host *host) struct dma_chan *terminate_chan = NULL; struct mmc_request *mrq; - cancel_delayed_work_sync(&host->timeout_work); + cancel_delayed_work(&host->timeout_work); mrq = host->mrq; From 49baa01c8b99ef92958e18fb58ebeb5dfdcde8af Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 5 Sep 2019 12:55:57 +0800 Subject: [PATCH 130/169] Revert "mmc: sdhci: Remove unneeded quirk2 flag of O2 SD host controller" This reverts commit 414126f9e5abf1973c661d24229543a9458fa8ce. This commit broke eMMC storage access on a new consumer MiniPC based on AMD SoC, which has eMMC connected to: 02:00.0 SD Host controller: O2 Micro, Inc. Device 8620 (rev 01) (prog-if 01) Subsystem: O2 Micro, Inc. Device 0002 During probe, several errors are seen including: mmc1: Got data interrupt 0x02000000 even though no data operation was in progress. mmc1: Timeout waiting for hardware interrupt. mmc1: error -110 whilst initialising MMC card Reverting this commit allows the eMMC storage to be detected & usable again. Signed-off-by: Daniel Drake Fixes: 414126f9e5ab ("mmc: sdhci: Remove unneeded quirk2 flag of O2 SD host controller") Cc: stable@vger.kernel.org # v5.1+ Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-o2micro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index 9dc4548271b4..19944b0049db 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -432,7 +432,6 @@ int sdhci_pci_o2_probe_slot(struct sdhci_pci_slot *slot) mmc_hostname(host->mmc)); host->flags &= ~SDHCI_SIGNALING_330; host->flags |= SDHCI_SIGNALING_180; - host->quirks2 |= SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD; host->mmc->caps2 |= MMC_CAP2_NO_SD; host->mmc->caps2 |= MMC_CAP2_NO_SDIO; pci_write_config_dword(chip->pdev, @@ -682,6 +681,7 @@ static const struct sdhci_ops sdhci_pci_o2_ops = { const struct sdhci_pci_fixes sdhci_o2 = { .probe = sdhci_pci_o2_probe, .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC, + .quirks2 = SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD, .probe_slot = sdhci_pci_o2_probe_slot, #ifdef CONFIG_PM_SLEEP .resume = sdhci_pci_o2_resume, From 3e493173b7841259a08c5c8e5cbe90adb349da7e Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 11 Sep 2019 16:03:05 +0300 Subject: [PATCH 131/169] mac80211: Do not send Layer 2 Update frame before authorization The Layer 2 Update frame is used to update bridges when a station roams to another AP even if that STA does not transmit any frames after the reassociation. This behavior was described in IEEE Std 802.11F-2003 as something that would happen based on MLME-ASSOCIATE.indication, i.e., before completing 4-way handshake. However, this IEEE trial-use recommended practice document was published before RSN (IEEE Std 802.11i-2004) and as such, did not consider RSN use cases. Furthermore, IEEE Std 802.11F-2003 was withdrawn in 2006 and as such, has not been maintained amd should not be used anymore. Sending out the Layer 2 Update frame immediately after association is fine for open networks (and also when using SAE, FT protocol, or FILS authentication when the station is actually authenticated by the time association completes). However, it is not appropriate for cases where RSN is used with PSK or EAP authentication since the station is actually fully authenticated only once the 4-way handshake completes after authentication and attackers might be able to use the unauthenticated triggering of Layer 2 Update frame transmission to disrupt bridge behavior. Fix this by postponing transmission of the Layer 2 Update frame from station entry addition to the point when the station entry is marked authorized. Similarly, send out the VLAN binding update only if the STA entry has already been authorized. Signed-off-by: Jouni Malinen Reviewed-by: Johannes Berg Signed-off-by: David S. Miller --- net/mac80211/cfg.c | 14 ++++---------- net/mac80211/sta_info.c | 4 ++++ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 111c400199ec..4105c97c7ba1 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1529,7 +1529,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, struct sta_info *sta; struct ieee80211_sub_if_data *sdata; int err; - int layer2_update; if (params->vlan) { sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); @@ -1573,18 +1572,12 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, test_sta_flag(sta, WLAN_STA_ASSOC)) rate_control_rate_init(sta); - layer2_update = sdata->vif.type == NL80211_IFTYPE_AP_VLAN || - sdata->vif.type == NL80211_IFTYPE_AP; - err = sta_info_insert_rcu(sta); if (err) { rcu_read_unlock(); return err; } - if (layer2_update) - cfg80211_send_layer2_update(sta->sdata->dev, sta->sta.addr); - rcu_read_unlock(); return 0; @@ -1682,10 +1675,11 @@ static int ieee80211_change_station(struct wiphy *wiphy, sta->sdata = vlansdata; ieee80211_check_fast_xmit(sta); - if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { ieee80211_vif_inc_num_mcast(sta->sdata); - - cfg80211_send_layer2_update(sta->sdata->dev, sta->sta.addr); + cfg80211_send_layer2_update(sta->sdata->dev, + sta->sta.addr); + } } err = sta_apply_parameters(local, sta, params); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 95eb8220e2e4..5fb368cc2633 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1979,6 +1979,10 @@ int sta_info_move_state(struct sta_info *sta, ieee80211_check_fast_xmit(sta); ieee80211_check_fast_rx(sta); } + if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sta->sdata->vif.type == NL80211_IFTYPE_AP) + cfg80211_send_layer2_update(sta->sdata->dev, + sta->sta.addr); break; default: break; From c5c1a030a7dbf8dd4e1fa4405ae9a89dc1d2a8db Mon Sep 17 00:00:00 2001 From: Ka-Cheong Poon Date: Wed, 11 Sep 2019 02:58:05 -0700 Subject: [PATCH 132/169] net/rds: An rds_sock is added too early to the hash table In rds_bind(), an rds_sock is added to the RDS bind hash table before rs_transport is set. This means that the socket can be found by the receive code path when rs_transport is NULL. And the receive code path de-references rs_transport for congestion update check. This can cause a panic. An rds_sock should not be added to the bind hash table before all the needed fields are set. Reported-by: syzbot+4b4f8163c2e246df3c4c@syzkaller.appspotmail.com Signed-off-by: Ka-Cheong Poon Signed-off-by: David S. Miller --- net/rds/bind.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/net/rds/bind.c b/net/rds/bind.c index 0f4398e7f2a7..05464fd7c17a 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -239,34 +239,30 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } - sock_set_flag(sk, SOCK_RCU_FREE); - ret = rds_add_bound(rs, binding_addr, &port, scope_id); - if (ret) - goto out; - - if (rs->rs_transport) { /* previously bound */ + /* The transport can be set using SO_RDS_TRANSPORT option before the + * socket is bound. + */ + if (rs->rs_transport) { trans = rs->rs_transport; if (trans->laddr_check(sock_net(sock->sk), binding_addr, scope_id) != 0) { ret = -ENOPROTOOPT; - rds_remove_bound(rs); - } else { - ret = 0; + goto out; } - goto out; - } - trans = rds_trans_get_preferred(sock_net(sock->sk), binding_addr, - scope_id); - if (!trans) { - ret = -EADDRNOTAVAIL; - rds_remove_bound(rs); - pr_info_ratelimited("RDS: %s could not find a transport for %pI6c, load rds_tcp or rds_rdma?\n", - __func__, binding_addr); - goto out; + } else { + trans = rds_trans_get_preferred(sock_net(sock->sk), + binding_addr, scope_id); + if (!trans) { + ret = -EADDRNOTAVAIL; + pr_info_ratelimited("RDS: %s could not find a transport for %pI6c, load rds_tcp or rds_rdma?\n", + __func__, binding_addr); + goto out; + } + rs->rs_transport = trans; } - rs->rs_transport = trans; - ret = 0; + sock_set_flag(sk, SOCK_RCU_FREE); + ret = rds_add_bound(rs, binding_addr, &port, scope_id); out: release_sock(sk); From 90aa11f1bc5dbb0c392775ed63ced23a3873bcd2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 11 Sep 2019 11:38:48 +0100 Subject: [PATCH 133/169] NFC: st95hf: fix spelling mistake "receieve" -> "receive" There is a spelling mistake in a dev_err message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/nfc/st95hf/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c index e42850095892..7eda62a9e0df 100644 --- a/drivers/nfc/st95hf/core.c +++ b/drivers/nfc/st95hf/core.c @@ -316,7 +316,7 @@ static int st95hf_echo_command(struct st95hf_context *st95context) &echo_response); if (result) { dev_err(&st95context->spicontext.spidev->dev, - "err: echo response receieve error = 0x%x\n", result); + "err: echo response receive error = 0x%x\n", result); return result; } From b93fb20f016c057d8a48c4ad62d493dfc5096d0c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 11 Sep 2019 12:37:34 +0100 Subject: [PATCH 134/169] net: lmc: fix spelling mistake "runnin" -> "running" There is a spelling mistake in the lmc_trace message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/wan/lmc/lmc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index d74349628db2..0e6a51525d91 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -1115,7 +1115,7 @@ static void lmc_running_reset (struct net_device *dev) /*fold00*/ sc->lmc_cmdmode |= (TULIP_CMD_TXRUN | TULIP_CMD_RXRUN); LMC_CSR_WRITE (sc, csr_command, sc->lmc_cmdmode); - lmc_trace(dev, "lmc_runnin_reset_out"); + lmc_trace(dev, "lmc_running_reset_out"); } From c3dc1fa72249e4472b90ecef4dbafe25f0f07889 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 11 Sep 2019 15:08:16 +0100 Subject: [PATCH 135/169] net: hns3: fix spelling mistake "undeflow" -> "underflow" There is a spelling mistake in a .msg literal string. Fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index 0a7243825e7b..85a3b062371d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -98,7 +98,7 @@ static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = { .reset_level = HNAE3_GLOBAL_RESET }, { .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow", .reset_level = HNAE3_GLOBAL_RESET }, - { .int_msk = BIT(2), .msg = "rx_stp_fifo_undeflow", + { .int_msk = BIT(2), .msg = "rx_stp_fifo_underflow", .reset_level = HNAE3_GLOBAL_RESET }, { .int_msk = BIT(3), .msg = "tx_buf_overflow", .reset_level = HNAE3_GLOBAL_RESET }, From f4b752a6b2708bfdf7fbe8a241082c8104f4ce05 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 11 Sep 2019 15:18:11 +0100 Subject: [PATCH 136/169] mlx4: fix spelling mistake "veify" -> "verify" There is a spelling mistake in a mlx4_err error message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 1f6e16d5ea6b..309470ec0219 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2240,7 +2240,7 @@ static int mlx4_validate_optimized_steering(struct mlx4_dev *dev) for (i = 1; i <= dev->caps.num_ports; i++) { if (mlx4_dev_port(dev, i, &port_cap)) { mlx4_err(dev, - "QUERY_DEV_CAP command failed, can't veify DMFS high rate steering.\n"); + "QUERY_DEV_CAP command failed, can't verify DMFS high rate steering.\n"); } else if ((dev->caps.dmfs_high_steer_mode != MLX4_STEERING_DMFS_A0_DEFAULT) && (port_cap.dmfs_optimized_state == From 377228accbbb8b9738f615d791aa803f41c067e0 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 4 Sep 2019 08:07:11 -0700 Subject: [PATCH 137/169] ixgbe: Prevent u8 wrapping of ITR value to something less than 10us There were a couple cases where the ITR value generated via the adaptive ITR scheme could exceed 126. This resulted in the value becoming either 0 or something less than 10. Switching back and forth between a value less than 10 and a value greater than 10 can cause issues as certain hardware features such as RSC to not function well when the ITR value has dropped that low. CC: stable@vger.kernel.org Fixes: b4ded8327fea ("ixgbe: Update adaptive ITR algorithm") Reported-by: Gregg Leventhal Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 7882148abb43..77ca9005dc41 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2621,7 +2621,7 @@ static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector, /* 16K ints/sec to 9.2K ints/sec */ avg_wire_size *= 15; avg_wire_size += 11452; - } else if (avg_wire_size <= 1980) { + } else if (avg_wire_size < 1968) { /* 9.2K ints/sec to 8K ints/sec */ avg_wire_size *= 5; avg_wire_size += 22420; @@ -2654,6 +2654,8 @@ static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector, case IXGBE_LINK_SPEED_2_5GB_FULL: case IXGBE_LINK_SPEED_1GB_FULL: case IXGBE_LINK_SPEED_10_FULL: + if (avg_wire_size > 8064) + avg_wire_size = 8064; itr += DIV_ROUND_UP(avg_wire_size, IXGBE_ITR_ADAPTIVE_MIN_INC * 64) * IXGBE_ITR_ADAPTIVE_MIN_INC; From bf280c0387ebbf8eebad1036fca8f7b85ebfde32 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 22 Aug 2019 20:12:37 +0300 Subject: [PATCH 138/169] ixgbe: fix double clean of Tx descriptors with xdp Tx code doesn't clear the descriptors' status after cleaning. So, if the budget is larger than number of used elems in a ring, some descriptors will be accounted twice and xsk_umem_complete_tx will move prod_tail far beyond the prod_head breaking the completion queue ring. Fix that by limiting the number of descriptors to clean by the number of used descriptors in the Tx ring. 'ixgbe_clean_xdp_tx_irq()' function refactored to look more like 'ixgbe_xsk_clean_tx_ring()' since we're allowed to directly use 'next_to_clean' and 'next_to_use' indexes. CC: stable@vger.kernel.org Fixes: 8221c5eba8c1 ("ixgbe: add AF_XDP zero-copy Tx support") Signed-off-by: Ilya Maximets Tested-by: William Tu Tested-by: Eelco Chaudron Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 29 ++++++++------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 6b609553329f..a3b6d8c89127 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -633,19 +633,17 @@ static void ixgbe_clean_xdp_tx_buffer(struct ixgbe_ring *tx_ring, bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector, struct ixgbe_ring *tx_ring, int napi_budget) { + u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use; unsigned int total_packets = 0, total_bytes = 0; - u32 i = tx_ring->next_to_clean, xsk_frames = 0; - unsigned int budget = q_vector->tx.work_limit; struct xdp_umem *umem = tx_ring->xsk_umem; union ixgbe_adv_tx_desc *tx_desc; struct ixgbe_tx_buffer *tx_bi; - bool xmit_done; + u32 xsk_frames = 0; - tx_bi = &tx_ring->tx_buffer_info[i]; - tx_desc = IXGBE_TX_DESC(tx_ring, i); - i -= tx_ring->count; + tx_bi = &tx_ring->tx_buffer_info[ntc]; + tx_desc = IXGBE_TX_DESC(tx_ring, ntc); - do { + while (ntc != ntu) { if (!(tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) break; @@ -661,22 +659,18 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector, tx_bi++; tx_desc++; - i++; - if (unlikely(!i)) { - i -= tx_ring->count; + ntc++; + if (unlikely(ntc == tx_ring->count)) { + ntc = 0; tx_bi = tx_ring->tx_buffer_info; tx_desc = IXGBE_TX_DESC(tx_ring, 0); } /* issue prefetch for next Tx descriptor */ prefetch(tx_desc); + } - /* update budget accounting */ - budget--; - } while (likely(budget)); - - i += tx_ring->count; - tx_ring->next_to_clean = i; + tx_ring->next_to_clean = ntc; u64_stats_update_begin(&tx_ring->syncp); tx_ring->stats.bytes += total_bytes; @@ -688,8 +682,7 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector, if (xsk_frames) xsk_umem_complete_tx(umem, xsk_frames); - xmit_done = ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit); - return budget > 0 && xmit_done; + return ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit); } int ixgbe_xsk_async_xmit(struct net_device *dev, u32 qid) From a89db445fbd7f1f8457b03759aa7343fa530ef6b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 8 Sep 2019 07:04:08 -0400 Subject: [PATCH 139/169] vhost: block speculation of translated descriptors iovec addresses coming from vhost are assumed to be pre-validated, but in fact can be speculated to a value out of range. Userspace address are later validated with array_index_nospec so we can be sure kernel info does not leak through these addresses, but vhost must also not leak userspace info outside the allowed memory table to guests. Following the defence in depth principle, make sure the address is not validated out of node range. Signed-off-by: Michael S. Tsirkin Cc: stable@vger.kernel.org Acked-by: Jason Wang Tested-by: Jason Wang --- drivers/vhost/vhost.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 5dc174ac8cac..34ea219936e3 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2071,8 +2071,10 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, _iov = iov + ret; size = node->size - addr + node->start; _iov->iov_len = min((u64)len - s, size); - _iov->iov_base = (void __user *)(unsigned long) - (node->userspace_addr + addr - node->start); + _iov->iov_base = (void __user *) + ((unsigned long)node->userspace_addr + + array_index_nospec((unsigned long)(addr - node->start), + node->size)); s += size; addr += size; ++ret; From 060423bfdee3f8bc6e2c1bac97de24d5415e2bc4 Mon Sep 17 00:00:00 2001 From: yongduan Date: Wed, 11 Sep 2019 17:44:24 +0800 Subject: [PATCH 140/169] vhost: make sure log_num < in_num The code assumes log_num < in_num everywhere, and that is true as long as in_num is incremented by descriptor iov count, and log_num by 1. However this breaks if there's a zero sized descriptor. As a result, if a malicious guest creates a vring desc with desc.len = 0, it may cause the host kernel to crash by overflowing the log array. This bug can be triggered during the VM migration. There's no need to log when desc.len = 0, so just don't increment log_num in this case. Fixes: 3a4d5c94e959 ("vhost_net: a kernel-level virtio server") Cc: stable@vger.kernel.org Reviewed-by: Lidong Chen Signed-off-by: ruippan Signed-off-by: yongduan Acked-by: Michael S. Tsirkin Reviewed-by: Tyler Hicks Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 34ea219936e3..acabf20b069e 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2180,7 +2180,7 @@ static int get_indirect(struct vhost_virtqueue *vq, /* If this is an input descriptor, increment that count. */ if (access == VHOST_ACCESS_WO) { *in_num += ret; - if (unlikely(log)) { + if (unlikely(log && ret)) { log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); log[*log_num].len = vhost32_to_cpu(vq, desc.len); ++*log_num; @@ -2321,7 +2321,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, /* If this is an input descriptor, * increment that count. */ *in_num += ret; - if (unlikely(log)) { + if (unlikely(log && ret)) { log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); log[*log_num].len = vhost32_to_cpu(vq, desc.len); ++*log_num; From af38d07ed391b21f7405fa1f936ca9686787d6d2 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 9 Sep 2019 16:56:02 -0400 Subject: [PATCH 141/169] tcp: fix tcp_ecn_withdraw_cwr() to clear TCP_ECN_QUEUE_CWR Fix tcp_ecn_withdraw_cwr() to clear the correct bit: TCP_ECN_QUEUE_CWR. Rationale: basically, TCP_ECN_DEMAND_CWR is a bit that is purely about the behavior of data receivers, and deciding whether to reflect incoming IP ECN CE marks as outgoing TCP th->ece marks. The TCP_ECN_QUEUE_CWR bit is purely about the behavior of data senders, and deciding whether to send CWR. The tcp_ecn_withdraw_cwr() function is only called from tcp_undo_cwnd_reduction() by data senders during an undo, so it should zero the sender-side state, TCP_ECN_QUEUE_CWR. It does not make sense to stop the reflection of incoming CE bits on incoming data packets just because outgoing packets were spuriously retransmitted. The bug has been reproduced with packetdrill to manifest in a scenario with RFC3168 ECN, with an incoming data packet with CE bit set and carrying a TCP timestamp value that causes cwnd undo. Before this fix, the IP CE bit was ignored and not reflected in the TCP ECE header bit, and sender sent a TCP CWR ('W') bit on the next outgoing data packet, even though the cwnd reduction had been undone. After this fix, the sender properly reflects the CE bit and does not set the W bit. Note: the bug actually predates 2005 git history; this Fixes footer is chosen to be the oldest SHA1 I have tested (from Sep 2007) for which the patch applies cleanly (since before this commit the code was in a .h file). Fixes: bdf1ee5d3bd3 ("[TCP]: Move code from tcp_ecn.h to tcp*.c and tcp.h & remove it") Signed-off-by: Neal Cardwell Acked-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Cc: Eric Dumazet Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c21e8a22fb3b..8a1cd93dbb09 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -266,7 +266,7 @@ static void tcp_ecn_accept_cwr(struct sock *sk, const struct sk_buff *skb) static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) { - tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; + tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; } static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) From c1432423a16825ef94e019808ed7330ee33ffc69 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Thu, 29 Aug 2019 16:47:38 +0930 Subject: [PATCH 142/169] pinctrl: aspeed: Fix spurious mux failures on the AST2500 Commit 674fa8daa8c9 ("pinctrl: aspeed-g5: Delay acquisition of regmaps") was determined to be a partial fix to the problem of acquiring the LPC Host Controller and GFX regmaps: The AST2500 pin controller may need to fetch syscon regmaps during expression evaluation as well as when setting mux state. For example, this case is hit by attempting to export pins exposing the LPC Host Controller as GPIOs. An optional eval() hook is added to the Aspeed pinmux operation struct and called from aspeed_sig_expr_eval() if the pointer is set by the SoC-specific driver. This enables the AST2500 to perform the custom action of acquiring its regmap dependencies as required. John Wang tested the fix on an Inspur FP5280G2 machine (AST2500-based) where the issue was found, and I've booted the fix on Witherspoon (AST2500) and Palmetto (AST2400) machines, and poked at relevant pins under QEMU by forcing mux configurations via devmem before exporting GPIOs to exercise the driver. Fixes: 7d29ed88acbb ("pinctrl: aspeed: Read and write bits in LPC and GFX controllers") Fixes: 674fa8daa8c9 ("pinctrl: aspeed-g5: Delay acquisition of regmaps") Reported-by: John Wang Tested-by: John Wang Signed-off-by: Andrew Jeffery Link: https://lore.kernel.org/r/20190829071738.2523-1-andrew@aj.id.au Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c | 30 +++++++++++++++++++++- drivers/pinctrl/aspeed/pinmux-aspeed.c | 7 +++-- drivers/pinctrl/aspeed/pinmux-aspeed.h | 7 ++--- 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c index ba6438ac4d72..ff84d1afd229 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c @@ -2552,7 +2552,7 @@ static struct regmap *aspeed_g5_acquire_regmap(struct aspeed_pinmux_data *ctx, if (IS_ERR(map)) return map; } else - map = ERR_PTR(-ENODEV); + return ERR_PTR(-ENODEV); ctx->maps[ASPEED_IP_LPC] = map; dev_dbg(ctx->dev, "Acquired LPC regmap"); @@ -2562,6 +2562,33 @@ static struct regmap *aspeed_g5_acquire_regmap(struct aspeed_pinmux_data *ctx, return ERR_PTR(-EINVAL); } +static int aspeed_g5_sig_expr_eval(struct aspeed_pinmux_data *ctx, + const struct aspeed_sig_expr *expr, + bool enabled) +{ + int ret; + int i; + + for (i = 0; i < expr->ndescs; i++) { + const struct aspeed_sig_desc *desc = &expr->descs[i]; + struct regmap *map; + + map = aspeed_g5_acquire_regmap(ctx, desc->ip); + if (IS_ERR(map)) { + dev_err(ctx->dev, + "Failed to acquire regmap for IP block %d\n", + desc->ip); + return PTR_ERR(map); + } + + ret = aspeed_sig_desc_eval(desc, enabled, ctx->maps[desc->ip]); + if (ret <= 0) + return ret; + } + + return 1; +} + /** * Configure a pin's signal by applying an expression's descriptor state for * all descriptors in the expression. @@ -2647,6 +2674,7 @@ static int aspeed_g5_sig_expr_set(struct aspeed_pinmux_data *ctx, } static const struct aspeed_pinmux_ops aspeed_g5_ops = { + .eval = aspeed_g5_sig_expr_eval, .set = aspeed_g5_sig_expr_set, }; diff --git a/drivers/pinctrl/aspeed/pinmux-aspeed.c b/drivers/pinctrl/aspeed/pinmux-aspeed.c index 839c01b7953f..57305ca838a7 100644 --- a/drivers/pinctrl/aspeed/pinmux-aspeed.c +++ b/drivers/pinctrl/aspeed/pinmux-aspeed.c @@ -78,11 +78,14 @@ int aspeed_sig_desc_eval(const struct aspeed_sig_desc *desc, * neither the enabled nor disabled state. Thus we must explicitly test for * either condition as required. */ -int aspeed_sig_expr_eval(const struct aspeed_pinmux_data *ctx, +int aspeed_sig_expr_eval(struct aspeed_pinmux_data *ctx, const struct aspeed_sig_expr *expr, bool enabled) { - int i; int ret; + int i; + + if (ctx->ops->eval) + return ctx->ops->eval(ctx, expr, enabled); for (i = 0; i < expr->ndescs; i++) { const struct aspeed_sig_desc *desc = &expr->descs[i]; diff --git a/drivers/pinctrl/aspeed/pinmux-aspeed.h b/drivers/pinctrl/aspeed/pinmux-aspeed.h index 52d299b59ce2..db3457c86f48 100644 --- a/drivers/pinctrl/aspeed/pinmux-aspeed.h +++ b/drivers/pinctrl/aspeed/pinmux-aspeed.h @@ -702,6 +702,8 @@ struct aspeed_pin_function { struct aspeed_pinmux_data; struct aspeed_pinmux_ops { + int (*eval)(struct aspeed_pinmux_data *ctx, + const struct aspeed_sig_expr *expr, bool enabled); int (*set)(struct aspeed_pinmux_data *ctx, const struct aspeed_sig_expr *expr, bool enabled); }; @@ -722,9 +724,8 @@ struct aspeed_pinmux_data { int aspeed_sig_desc_eval(const struct aspeed_sig_desc *desc, bool enabled, struct regmap *map); -int aspeed_sig_expr_eval(const struct aspeed_pinmux_data *ctx, - const struct aspeed_sig_expr *expr, - bool enabled); +int aspeed_sig_expr_eval(struct aspeed_pinmux_data *ctx, + const struct aspeed_sig_expr *expr, bool enabled); static inline int aspeed_sig_expr_set(struct aspeed_pinmux_data *ctx, const struct aspeed_sig_expr *expr, From 77f22f92dff8e7b45c7786a430626d38071d4670 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 10 Sep 2019 18:56:57 +0800 Subject: [PATCH 143/169] tun: fix use-after-free when register netdev failed I got a UAF repport in tun driver when doing fuzzy test: [ 466.269490] ================================================================== [ 466.271792] BUG: KASAN: use-after-free in tun_chr_read_iter+0x2ca/0x2d0 [ 466.271806] Read of size 8 at addr ffff888372139250 by task tun-test/2699 [ 466.271810] [ 466.271824] CPU: 1 PID: 2699 Comm: tun-test Not tainted 5.3.0-rc1-00001-g5a9433db2614-dirty #427 [ 466.271833] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 [ 466.271838] Call Trace: [ 466.271858] dump_stack+0xca/0x13e [ 466.271871] ? tun_chr_read_iter+0x2ca/0x2d0 [ 466.271890] print_address_description+0x79/0x440 [ 466.271906] ? vprintk_func+0x5e/0xf0 [ 466.271920] ? tun_chr_read_iter+0x2ca/0x2d0 [ 466.271935] __kasan_report+0x15c/0x1df [ 466.271958] ? tun_chr_read_iter+0x2ca/0x2d0 [ 466.271976] kasan_report+0xe/0x20 [ 466.271987] tun_chr_read_iter+0x2ca/0x2d0 [ 466.272013] do_iter_readv_writev+0x4b7/0x740 [ 466.272032] ? default_llseek+0x2d0/0x2d0 [ 466.272072] do_iter_read+0x1c5/0x5e0 [ 466.272110] vfs_readv+0x108/0x180 [ 466.299007] ? compat_rw_copy_check_uvector+0x440/0x440 [ 466.299020] ? fsnotify+0x888/0xd50 [ 466.299040] ? __fsnotify_parent+0xd0/0x350 [ 466.299064] ? fsnotify_first_mark+0x1e0/0x1e0 [ 466.304548] ? vfs_write+0x264/0x510 [ 466.304569] ? ksys_write+0x101/0x210 [ 466.304591] ? do_preadv+0x116/0x1a0 [ 466.304609] do_preadv+0x116/0x1a0 [ 466.309829] do_syscall_64+0xc8/0x600 [ 466.309849] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 466.309861] RIP: 0033:0x4560f9 [ 466.309875] Code: 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 [ 466.309889] RSP: 002b:00007ffffa5166e8 EFLAGS: 00000206 ORIG_RAX: 0000000000000127 [ 466.322992] RAX: ffffffffffffffda RBX: 0000000000400460 RCX: 00000000004560f9 [ 466.322999] RDX: 0000000000000003 RSI: 00000000200008c0 RDI: 0000000000000003 [ 466.323007] RBP: 00007ffffa516700 R08: 0000000000000004 R09: 0000000000000000 [ 466.323014] R10: 0000000000000000 R11: 0000000000000206 R12: 000000000040cb10 [ 466.323021] R13: 0000000000000000 R14: 00000000006d7018 R15: 0000000000000000 [ 466.323057] [ 466.323064] Allocated by task 2605: [ 466.335165] save_stack+0x19/0x80 [ 466.336240] __kasan_kmalloc.constprop.8+0xa0/0xd0 [ 466.337755] kmem_cache_alloc+0xe8/0x320 [ 466.339050] getname_flags+0xca/0x560 [ 466.340229] user_path_at_empty+0x2c/0x50 [ 466.341508] vfs_statx+0xe6/0x190 [ 466.342619] __do_sys_newstat+0x81/0x100 [ 466.343908] do_syscall_64+0xc8/0x600 [ 466.345303] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 466.347034] [ 466.347517] Freed by task 2605: [ 466.348471] save_stack+0x19/0x80 [ 466.349476] __kasan_slab_free+0x12e/0x180 [ 466.350726] kmem_cache_free+0xc8/0x430 [ 466.351874] putname+0xe2/0x120 [ 466.352921] filename_lookup+0x257/0x3e0 [ 466.354319] vfs_statx+0xe6/0x190 [ 466.355498] __do_sys_newstat+0x81/0x100 [ 466.356889] do_syscall_64+0xc8/0x600 [ 466.358037] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 466.359567] [ 466.360050] The buggy address belongs to the object at ffff888372139100 [ 466.360050] which belongs to the cache names_cache of size 4096 [ 466.363735] The buggy address is located 336 bytes inside of [ 466.363735] 4096-byte region [ffff888372139100, ffff88837213a100) [ 466.367179] The buggy address belongs to the page: [ 466.368604] page:ffffea000dc84e00 refcount:1 mapcount:0 mapping:ffff8883df1b4f00 index:0x0 compound_mapcount: 0 [ 466.371582] flags: 0x2fffff80010200(slab|head) [ 466.372910] raw: 002fffff80010200 dead000000000100 dead000000000122 ffff8883df1b4f00 [ 466.375209] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 466.377778] page dumped because: kasan: bad access detected [ 466.379730] [ 466.380288] Memory state around the buggy address: [ 466.381844] ffff888372139100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 466.384009] ffff888372139180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 466.386131] >ffff888372139200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 466.388257] ^ [ 466.390234] ffff888372139280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 466.392512] ffff888372139300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 466.394667] ================================================================== tun_chr_read_iter() accessed the memory which freed by free_netdev() called by tun_set_iff(): CPUA CPUB tun_set_iff() alloc_netdev_mqs() tun_attach() tun_chr_read_iter() tun_get() tun_do_read() tun_ring_recv() register_netdevice() <-- inject error goto err_detach tun_detach_all() <-- set RCV_SHUTDOWN free_netdev() <-- called from err_free_dev path netdev_freemem() <-- free the memory without check refcount (In this path, the refcount cannot prevent freeing the memory of dev, and the memory will be used by dev_put() called by tun_chr_read_iter() on CPUB.) (Break from tun_ring_recv(), because RCV_SHUTDOWN is set) tun_put() dev_put() <-- use the memory freed by netdev_freemem() Put the publishing of tfile->tun after register_netdevice(), so tun_get() won't get the tun pointer that freed by err_detach path if register_netdevice() failed. Fixes: eb0fb363f920 ("tuntap: attach queue 0 before registering netdevice") Reported-by: Hulk Robot Suggested-by: Jason Wang Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- drivers/net/tun.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index db16d7a13e00..aab0be40d443 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -787,7 +787,8 @@ static void tun_detach_all(struct net_device *dev) } static int tun_attach(struct tun_struct *tun, struct file *file, - bool skip_filter, bool napi, bool napi_frags) + bool skip_filter, bool napi, bool napi_frags, + bool publish_tun) { struct tun_file *tfile = file->private_data; struct net_device *dev = tun->dev; @@ -870,7 +871,8 @@ static int tun_attach(struct tun_struct *tun, struct file *file, * initialized tfile; otherwise we risk using half-initialized * object. */ - rcu_assign_pointer(tfile->tun, tun); + if (publish_tun) + rcu_assign_pointer(tfile->tun, tun); rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); tun->numqueues++; tun_set_real_num_queues(tun); @@ -2730,7 +2732,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER, ifr->ifr_flags & IFF_NAPI, - ifr->ifr_flags & IFF_NAPI_FRAGS); + ifr->ifr_flags & IFF_NAPI_FRAGS, true); if (err < 0) return err; @@ -2829,13 +2831,17 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) INIT_LIST_HEAD(&tun->disabled); err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI, - ifr->ifr_flags & IFF_NAPI_FRAGS); + ifr->ifr_flags & IFF_NAPI_FRAGS, false); if (err < 0) goto err_free_flow; err = register_netdevice(tun->dev); if (err < 0) goto err_detach; + /* free_netdev() won't check refcnt, to aovid race + * with dev_put() we need publish tun after registration. + */ + rcu_assign_pointer(tfile->tun, tun); } netif_carrier_on(tun->dev); @@ -2978,7 +2984,7 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr) if (ret < 0) goto unlock; ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI, - tun->flags & IFF_NAPI_FRAGS); + tun->flags & IFF_NAPI_FRAGS, true); } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { tun = rtnl_dereference(tfile->tun); if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached) From d23dbc479a8e813db4161a695d67da0e36557846 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 10 Sep 2019 13:29:59 +0200 Subject: [PATCH 144/169] ipv6: Fix the link time qualifier of 'ping_v6_proc_exit_net()' The '.exit' functions from 'pernet_operations' structure should be marked as __net_exit, not __net_init. Fixes: d862e5461423 ("net: ipv6: Implement /proc/net/icmp6.") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- net/ipv6/ping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 87d2d8c1db7c..98ac32b49d8c 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -223,7 +223,7 @@ static int __net_init ping_v6_proc_init_net(struct net *net) return 0; } -static void __net_init ping_v6_proc_exit_net(struct net *net) +static void __net_exit ping_v6_proc_exit_net(struct net *net) { remove_proc_entry("icmp6", net->proc_net); } From 10cc514f451a0f239aa34f91bc9dc954a9397840 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Tue, 10 Sep 2019 14:02:57 -0600 Subject: [PATCH 145/169] net: Fix null de-reference of device refcount In event of failure during register_netdevice, free_netdev is invoked immediately. free_netdev assumes that all the netdevice refcounts have been dropped prior to it being called and as a result frees and clears out the refcount pointer. However, this is not necessarily true as some of the operations in the NETDEV_UNREGISTER notifier handlers queue RCU callbacks for invocation after a grace period. The IPv4 callback in_dev_rcu_put tries to access the refcount after free_netdev is called which leads to a null de-reference- 44837.761523: <6> Unable to handle kernel paging request at virtual address 0000004a88287000 44837.761651: <2> pc : in_dev_finish_destroy+0x4c/0xc8 44837.761654: <2> lr : in_dev_finish_destroy+0x2c/0xc8 44837.762393: <2> Call trace: 44837.762398: <2> in_dev_finish_destroy+0x4c/0xc8 44837.762404: <2> in_dev_rcu_put+0x24/0x30 44837.762412: <2> rcu_nocb_kthread+0x43c/0x468 44837.762418: <2> kthread+0x118/0x128 44837.762424: <2> ret_from_fork+0x10/0x1c Fix this by waiting for the completion of the call_rcu() in case of register_netdevice errors. Fixes: 93ee31f14f6f ("[NET]: Fix free_netdev on register_netdev failure.") Cc: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 0891f499c1bb..5156c0edebe8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8758,6 +8758,8 @@ int register_netdevice(struct net_device *dev) ret = notifier_to_errno(ret); if (ret) { rollback_registered(dev); + rcu_barrier(); + dev->reg_state = NETREG_UNREGISTERED; } /* From a21b7f0cff1906a93a0130b74713b15a0b36481d Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Wed, 11 Sep 2019 10:09:02 -0500 Subject: [PATCH 146/169] net: qrtr: fix memort leak in qrtr_tun_write_iter In qrtr_tun_write_iter the allocated kbuf should be release in case of error or success return. v2 Update: Thanks to David Miller for pointing out the release on success path as well. Signed-off-by: Navid Emamdoost Signed-off-by: David S. Miller --- net/qrtr/tun.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/qrtr/tun.c b/net/qrtr/tun.c index ccff1e544c21..e35869e81766 100644 --- a/net/qrtr/tun.c +++ b/net/qrtr/tun.c @@ -84,11 +84,14 @@ static ssize_t qrtr_tun_write_iter(struct kiocb *iocb, struct iov_iter *from) if (!kbuf) return -ENOMEM; - if (!copy_from_iter_full(kbuf, len, from)) + if (!copy_from_iter_full(kbuf, len, from)) { + kfree(kbuf); return -EFAULT; + } ret = qrtr_endpoint_post(&tun->ep, kbuf, len); + kfree(kbuf); return ret < 0 ? ret : len; } From 13a17cc0526f08d1df9507f7484176371cd263a0 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 11 Sep 2019 03:52:18 -0400 Subject: [PATCH 147/169] KVM: s390: kvm_s390_vm_start_migration: check dirty_bitmap before using it as target for memset() If userspace doesn't set KVM_MEM_LOG_DIRTY_PAGES on memslot before calling kvm_s390_vm_start_migration(), kernel will oops with: Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 0000000000000000 TEID: 0000000000000483 Fault in home space mode while using kernel ASCE. AS:0000000002a2000b R2:00000001bff8c00b R3:00000001bff88007 S:00000001bff91000 P:000000000000003d Oops: 0004 ilc:2 [#1] SMP ... Call Trace: ([<001fffff804ec552>] kvm_s390_vm_set_attr+0x347a/0x3828 [kvm]) [<001fffff804ecfc0>] kvm_arch_vm_ioctl+0x6c0/0x1998 [kvm] [<001fffff804b67e4>] kvm_vm_ioctl+0x51c/0x11a8 [kvm] [<00000000008ba572>] do_vfs_ioctl+0x1d2/0xe58 [<00000000008bb284>] ksys_ioctl+0x8c/0xb8 [<00000000008bb2e2>] sys_ioctl+0x32/0x40 [<000000000175552c>] system_call+0x2b8/0x2d8 INFO: lockdep is turned off. Last Breaking-Event-Address: [<0000000000dbaf60>] __memset+0xc/0xa0 due to ms->dirty_bitmap being NULL, which might crash the host. Make sure that ms->dirty_bitmap is set before using it or return -EINVAL otherwise. Cc: Fixes: afdad61615cc ("KVM: s390: Fix storage attributes migration with memory slots") Signed-off-by: Igor Mammedov Link: https://lore.kernel.org/kvm/20190911075218.29153-1-imammedo@redhat.com/ Reviewed-by: David Hildenbrand Reviewed-by: Christian Borntraeger Reviewed-by: Claudio Imbrenda Reviewed-by: Cornelia Huck Reviewed-by: Janosch Frank Signed-off-by: Janosch Frank Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3f520cd837fb..b298fcc9ec56 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1018,6 +1018,8 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm) /* mark all the pages in active slots as dirty */ for (slotnr = 0; slotnr < slots->used_slots; slotnr++) { ms = slots->memslots + slotnr; + if (!ms->dirty_bitmap) + return -EINVAL; /* * The second half of the bitmap is only used on x86, * and would be wasted otherwise, so we put it to good From 410f954cb1d1c79ae485dd83a175f21954fd87cd Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 10 Sep 2019 15:26:49 +0100 Subject: [PATCH 148/169] Btrfs: fix assertion failure during fsync and use of stale transaction Sometimes when fsync'ing a file we need to log that other inodes exist and when we need to do that we acquire a reference on the inodes and then drop that reference using iput() after logging them. That generally is not a problem except if we end up doing the final iput() (dropping the last reference) on the inode and that inode has a link count of 0, which can happen in a very short time window if the logging path gets a reference on the inode while it's being unlinked. In that case we end up getting the eviction callback, btrfs_evict_inode(), invoked through the iput() call chain which needs to drop all of the inode's items from its subvolume btree, and in order to do that, it needs to join a transaction at the helper function evict_refill_and_join(). However because the task previously started a transaction at the fsync handler, btrfs_sync_file(), it has current->journal_info already pointing to a transaction handle and therefore evict_refill_and_join() will get that transaction handle from btrfs_join_transaction(). From this point on, two different problems can happen: 1) evict_refill_and_join() will often change the transaction handle's block reserve (->block_rsv) and set its ->bytes_reserved field to a value greater than 0. If evict_refill_and_join() never commits the transaction, the eviction handler ends up decreasing the reference count (->use_count) of the transaction handle through the call to btrfs_end_transaction(), and after that point we have a transaction handle with a NULL ->block_rsv (which is the value prior to the transaction join from evict_refill_and_join()) and a ->bytes_reserved value greater than 0. If after the eviction/iput completes the inode logging path hits an error or it decides that it must fallback to a transaction commit, the btrfs fsync handle, btrfs_sync_file(), gets a non-zero value from btrfs_log_dentry_safe(), and because of that non-zero value it tries to commit the transaction using a handle with a NULL ->block_rsv and a non-zero ->bytes_reserved value. This makes the transaction commit hit an assertion failure at btrfs_trans_release_metadata() because ->bytes_reserved is not zero but the ->block_rsv is NULL. The produced stack trace for that is like the following: [192922.917158] assertion failed: !trans->bytes_reserved, file: fs/btrfs/transaction.c, line: 816 [192922.917553] ------------[ cut here ]------------ [192922.917922] kernel BUG at fs/btrfs/ctree.h:3532! [192922.918310] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC PTI [192922.918666] CPU: 2 PID: 883 Comm: fsstress Tainted: G W 5.1.4-btrfs-next-47 #1 [192922.919035] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014 [192922.919801] RIP: 0010:assfail.constprop.25+0x18/0x1a [btrfs] (...) [192922.920925] RSP: 0018:ffffaebdc8a27da8 EFLAGS: 00010286 [192922.921315] RAX: 0000000000000051 RBX: ffff95c9c16a41c0 RCX: 0000000000000000 [192922.921692] RDX: 0000000000000000 RSI: ffff95cab6b16838 RDI: ffff95cab6b16838 [192922.922066] RBP: ffff95c9c16a41c0 R08: 0000000000000000 R09: 0000000000000000 [192922.922442] R10: ffffaebdc8a27e70 R11: 0000000000000000 R12: ffff95ca731a0980 [192922.922820] R13: 0000000000000000 R14: ffff95ca84c73338 R15: ffff95ca731a0ea8 [192922.923200] FS: 00007f337eda4e80(0000) GS:ffff95cab6b00000(0000) knlGS:0000000000000000 [192922.923579] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [192922.923948] CR2: 00007f337edad000 CR3: 00000001e00f6002 CR4: 00000000003606e0 [192922.924329] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [192922.924711] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [192922.925105] Call Trace: [192922.925505] btrfs_trans_release_metadata+0x10c/0x170 [btrfs] [192922.925911] btrfs_commit_transaction+0x3e/0xaf0 [btrfs] [192922.926324] btrfs_sync_file+0x44c/0x490 [btrfs] [192922.926731] do_fsync+0x38/0x60 [192922.927138] __x64_sys_fdatasync+0x13/0x20 [192922.927543] do_syscall_64+0x60/0x1c0 [192922.927939] entry_SYSCALL_64_after_hwframe+0x49/0xbe (...) [192922.934077] ---[ end trace f00808b12068168f ]--- 2) If evict_refill_and_join() decides to commit the transaction, it will be able to do it, since the nested transaction join only increments the transaction handle's ->use_count reference counter and it does not prevent the transaction from getting committed. This means that after eviction completes, the fsync logging path will be using a transaction handle that refers to an already committed transaction. What happens when using such a stale transaction can be unpredictable, we are at least having a use-after-free on the transaction handle itself, since the transaction commit will call kmem_cache_free() against the handle regardless of its ->use_count value, or we can end up silently losing all the updates to the log tree after that iput() in the logging path, or using a transaction handle that in the meanwhile was allocated to another task for a new transaction, etc, pretty much unpredictable what can happen. In order to fix both of them, instead of using iput() during logging, use btrfs_add_delayed_iput(), so that the logging path of fsync never drops the last reference on an inode, that step is offloaded to a safe context (usually the cleaner kthread). The assertion failure issue was sporadically triggered by the test case generic/475 from fstests, which loads the dm error target while fsstress is running, which lead to fsync failing while logging inodes with -EIO errors and then trying later to commit the transaction, triggering the assertion failure. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6c8297bcfeb7..1bfd7e34f31e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4985,7 +4985,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, BTRFS_I(inode), LOG_OTHER_INODE_ALL, 0, LLONG_MAX, ctx); - iput(inode); + btrfs_add_delayed_iput(inode); } } continue; @@ -5000,7 +5000,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, ret = btrfs_log_inode(trans, root, BTRFS_I(inode), LOG_OTHER_INODE, 0, LLONG_MAX, ctx); if (ret) { - iput(inode); + btrfs_add_delayed_iput(inode); continue; } @@ -5009,7 +5009,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, key.offset = 0; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) { - iput(inode); + btrfs_add_delayed_iput(inode); continue; } @@ -5056,7 +5056,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, } path->slots[0]++; } - iput(inode); + btrfs_add_delayed_iput(inode); } return ret; @@ -5689,7 +5689,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, } if (btrfs_inode_in_log(BTRFS_I(di_inode), trans->transid)) { - iput(di_inode); + btrfs_add_delayed_iput(di_inode); break; } @@ -5701,7 +5701,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, if (!ret && btrfs_must_commit_transaction(trans, BTRFS_I(di_inode))) ret = 1; - iput(di_inode); + btrfs_add_delayed_iput(di_inode); if (ret) goto next_dir_inode; if (ctx->log_new_dentries) { @@ -5848,7 +5848,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, if (!ret && ctx && ctx->log_new_dentries) ret = log_new_dir_dentries(trans, root, BTRFS_I(dir_inode), ctx); - iput(dir_inode); + btrfs_add_delayed_iput(dir_inode); if (ret) goto out; } @@ -5891,7 +5891,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans, ret = btrfs_log_inode(trans, root, BTRFS_I(inode), LOG_INODE_EXISTS, 0, LLONG_MAX, ctx); - iput(inode); + btrfs_add_delayed_iput(inode); if (ret) return ret; From 18dfa7117a3f379862dcd3f67cadd678013bb9dd Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 11 Sep 2019 17:42:00 +0100 Subject: [PATCH 149/169] Btrfs: fix unwritten extent buffers and hangs on future writeback attempts The lock_extent_buffer_io() returns 1 to the caller to tell it everything went fine and the callers needs to start writeback for the extent buffer (submit a bio, etc), 0 to tell the caller everything went fine but it does not need to start writeback for the extent buffer, and a negative value if some error happened. When it's about to return 1 it tries to lock all pages, and if a try lock on a page fails, and we didn't flush any existing bio in our "epd", it calls flush_write_bio(epd) and overwrites the return value of 1 to 0 or an error. The page might have been locked elsewhere, not with the goal of starting writeback of the extent buffer, and even by some code other than btrfs, like page migration for example, so it does not mean the writeback of the extent buffer was already started by some other task, so returning a 0 tells the caller (btree_write_cache_pages()) to not start writeback for the extent buffer. Note that epd might currently have either no bio, so flush_write_bio() returns 0 (success) or it might have a bio for another extent buffer with a lower index (logical address). Since we return 0 with the EXTENT_BUFFER_WRITEBACK bit set on the extent buffer and writeback is never started for the extent buffer, future attempts to writeback the extent buffer will hang forever waiting on that bit to be cleared, since it can only be cleared after writeback completes. Such hang is reported with a trace like the following: [49887.347053] INFO: task btrfs-transacti:1752 blocked for more than 122 seconds. [49887.347059] Not tainted 5.2.13-gentoo #2 [49887.347060] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [49887.347062] btrfs-transacti D 0 1752 2 0x80004000 [49887.347064] Call Trace: [49887.347069] ? __schedule+0x265/0x830 [49887.347071] ? bit_wait+0x50/0x50 [49887.347072] ? bit_wait+0x50/0x50 [49887.347074] schedule+0x24/0x90 [49887.347075] io_schedule+0x3c/0x60 [49887.347077] bit_wait_io+0x8/0x50 [49887.347079] __wait_on_bit+0x6c/0x80 [49887.347081] ? __lock_release.isra.29+0x155/0x2d0 [49887.347083] out_of_line_wait_on_bit+0x7b/0x80 [49887.347084] ? var_wake_function+0x20/0x20 [49887.347087] lock_extent_buffer_for_io+0x28c/0x390 [49887.347089] btree_write_cache_pages+0x18e/0x340 [49887.347091] do_writepages+0x29/0xb0 [49887.347093] ? kmem_cache_free+0x132/0x160 [49887.347095] ? convert_extent_bit+0x544/0x680 [49887.347097] filemap_fdatawrite_range+0x70/0x90 [49887.347099] btrfs_write_marked_extents+0x53/0x120 [49887.347100] btrfs_write_and_wait_transaction.isra.4+0x38/0xa0 [49887.347102] btrfs_commit_transaction+0x6bb/0x990 [49887.347103] ? start_transaction+0x33e/0x500 [49887.347105] transaction_kthread+0x139/0x15c So fix this by not overwriting the return value (ret) with the result from flush_write_bio(). We also need to clear the EXTENT_BUFFER_WRITEBACK bit in case flush_write_bio() returns an error, otherwise it will hang any future attempts to writeback the extent buffer, and undo all work done before (set back EXTENT_BUFFER_DIRTY, etc). This is a regression introduced in the 5.2 kernel. Fixes: 2e3c25136adfb ("btrfs: extent_io: add proper error handling to lock_extent_buffer_for_io()") Fixes: f4340622e0226 ("btrfs: extent_io: Move the BUG_ON() in flush_write_bio() one level up") Reported-by: Zdenek Sojka Link: https://lore.kernel.org/linux-btrfs/GpO.2yos.3WGDOLpx6t%7D.1TUDYM@seznam.cz/T/#u Reported-by: Stefan Priebe - Profihost AG Link: https://lore.kernel.org/linux-btrfs/5c4688ac-10a7-fb07-70e8-c5d31a3fbb38@profihost.ag/T/#t Reported-by: Drazen Kacar Link: https://lore.kernel.org/linux-btrfs/DB8PR03MB562876ECE2319B3E579590F799C80@DB8PR03MB5628.eurprd03.prod.outlook.com/ Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=204377 Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index aea990473392..9d1f42200af5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3628,6 +3628,13 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb) TASK_UNINTERRUPTIBLE); } +static void end_extent_buffer_writeback(struct extent_buffer *eb) +{ + clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); + smp_mb__after_atomic(); + wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); +} + /* * Lock eb pages and flush the bio if we can't the locks * @@ -3699,8 +3706,11 @@ static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb if (!trylock_page(p)) { if (!flush) { - ret = flush_write_bio(epd); - if (ret < 0) { + int err; + + err = flush_write_bio(epd); + if (err < 0) { + ret = err; failed_page_nr = i; goto err_unlock; } @@ -3715,16 +3725,23 @@ static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb /* Unlock already locked pages */ for (i = 0; i < failed_page_nr; i++) unlock_page(eb->pages[i]); + /* + * Clear EXTENT_BUFFER_WRITEBACK and wake up anyone waiting on it. + * Also set back EXTENT_BUFFER_DIRTY so future attempts to this eb can + * be made and undo everything done before. + */ + btrfs_tree_lock(eb); + spin_lock(&eb->refs_lock); + set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); + end_extent_buffer_writeback(eb); + spin_unlock(&eb->refs_lock); + percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, eb->len, + fs_info->dirty_metadata_batch); + btrfs_clear_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); + btrfs_tree_unlock(eb); return ret; } -static void end_extent_buffer_writeback(struct extent_buffer *eb) -{ - clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); - smp_mb__after_atomic(); - wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); -} - static void set_btree_ioerr(struct page *page) { struct extent_buffer *eb = (struct extent_buffer *)page->private; From f39b683d35dfa93a58f1b400a8ec0ff81296b37c Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 12 Sep 2019 13:01:44 +0200 Subject: [PATCH 150/169] ixgbe: Fix secpath usage for IPsec TX offload. The ixgbe driver currently does IPsec TX offloading based on an existing secpath. However, the secpath can also come from the RX side, in this case it is misinterpreted for TX offload and the packets are dropped with a "bad sa_idx" error. Fix this by using the xfrm_offload() function to test for TX offload. Fixes: 592594704761 ("ixgbe: process the Tx ipsec offload") Reported-by: Michael Marley Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 77ca9005dc41..51c696b6bf64 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ixgbe.h" #include "ixgbe_common.h" @@ -8697,7 +8698,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, #endif /* IXGBE_FCOE */ #ifdef CONFIG_IXGBE_IPSEC - if (secpath_exists(skb) && + if (xfrm_offload(skb) && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx)) goto out_drop; #endif From b456d72412ca8797234449c25815e82f4e1426c0 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 11 Sep 2019 18:02:39 +0200 Subject: [PATCH 151/169] sctp: Fix the link time qualifier of 'sctp_ctrlsock_exit()' The '.exit' functions from 'pernet_operations' structure should be marked as __net_exit, not __net_init. Fixes: 8e2d61e0aed2 ("sctp: fix race on protocol/netns initialization") Signed-off-by: Christophe JAILLET Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 2d47adcb4cbe..53746ffeeca3 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1336,7 +1336,7 @@ static int __net_init sctp_ctrlsock_init(struct net *net) return status; } -static void __net_init sctp_ctrlsock_exit(struct net *net) +static void __net_exit sctp_ctrlsock_exit(struct net *net) { /* Free the control endpoint. */ inet_ctl_sock_destroy(net->sctp.ctl_sock); From 53936b5bf35e140ae27e4bbf0447a61063f400da Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 12 Sep 2019 13:54:38 +0200 Subject: [PATCH 152/169] KVM: s390: Do not leak kernel stack data in the KVM_S390_INTERRUPT ioctl When the userspace program runs the KVM_S390_INTERRUPT ioctl to inject an interrupt, we convert them from the legacy struct kvm_s390_interrupt to the new struct kvm_s390_irq via the s390int_to_s390irq() function. However, this function does not take care of all types of interrupts that we can inject into the guest later (see do_inject_vcpu()). Since we do not clear out the s390irq values before calling s390int_to_s390irq(), there is a chance that we copy random data from the kernel stack which could be leaked to the userspace later. Specifically, the problem exists with the KVM_S390_INT_PFAULT_INIT interrupt: s390int_to_s390irq() does not handle it, and the function __inject_pfault_init() later copies irq->u.ext which contains the random kernel stack data. This data can then be leaked either to the guest memory in __deliver_pfault_init(), or the userspace might retrieve it directly with the KVM_S390_GET_IRQ_STATE ioctl. Fix it by handling that interrupt type in s390int_to_s390irq(), too, and by making sure that the s390irq struct is properly pre-initialized. And while we're at it, make sure that s390int_to_s390irq() now directly returns -EINVAL for unknown interrupt types, so that we immediately get a proper error code in case we add more interrupt types to do_inject_vcpu() without updating s390int_to_s390irq() sometime in the future. Cc: stable@vger.kernel.org Reviewed-by: David Hildenbrand Reviewed-by: Christian Borntraeger Reviewed-by: Janosch Frank Signed-off-by: Thomas Huth Link: https://lore.kernel.org/kvm/20190912115438.25761-1-thuth@redhat.com Signed-off-by: Christian Borntraeger --- arch/s390/kvm/interrupt.c | 10 ++++++++++ arch/s390/kvm/kvm-s390.c | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index b5fd6e85657c..d1ccc168c071 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1961,6 +1961,16 @@ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int, case KVM_S390_MCHK: irq->u.mchk.mcic = s390int->parm64; break; + case KVM_S390_INT_PFAULT_INIT: + irq->u.ext.ext_params = s390int->parm; + irq->u.ext.ext_params2 = s390int->parm64; + break; + case KVM_S390_RESTART: + case KVM_S390_INT_CLOCK_COMP: + case KVM_S390_INT_CPU_TIMER: + break; + default: + return -EINVAL; } return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b298fcc9ec56..0d697b436f2b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4335,7 +4335,7 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp, } case KVM_S390_INTERRUPT: { struct kvm_s390_interrupt s390int; - struct kvm_s390_irq s390irq; + struct kvm_s390_irq s390irq = {}; if (copy_from_user(&s390int, argp, sizeof(s390int))) return -EFAULT; From a0eb9abd8af92d1aa34bc1e24dfbd1ba0bd6a56c Mon Sep 17 00:00:00 2001 From: Eugene Syromiatnikov Date: Wed, 11 Sep 2019 18:45:40 +0100 Subject: [PATCH 153/169] fork: block invalid exit signals with clone3() Previously, higher 32 bits of exit_signal fields were lost when copied to the kernel args structure (that uses int as a type for the respective field). Moreover, as Oleg has noted, exit_signal is used unchecked, so it has to be checked for sanity before use; for the legacy syscalls, applying CSIGNAL mask guarantees that it is at least non-negative; however, there's no such thing is done in clone3() code path, and that can break at least thread_group_leader. This commit adds a check to copy_clone_args_from_user() to verify that the exit signal is limited by CSIGNAL as with legacy clone() and that the signal is valid. With this we don't get the legacy clone behavior were an invalid signal could be handed down and would only be detected and ignored in do_notify_parent(). Users of clone3() will now get a proper error when they pass an invalid exit signal. Note, that this is not user-visible behavior since no kernel with clone3() has been released yet. The following program will cause a splat on a non-fixed clone3() version and will fail correctly on a fixed version: #define _GNU_SOURCE #include #include #include #include #include #include #include #include int main(int argc, char *argv[]) { pid_t pid = -1; struct clone_args args = {0}; args.exit_signal = -1; pid = syscall(__NR_clone3, &args, sizeof(struct clone_args)); if (pid < 0) exit(EXIT_FAILURE); if (pid == 0) exit(EXIT_SUCCESS); wait(NULL); exit(EXIT_SUCCESS); } Fixes: 7f192e3cd316 ("fork: add clone3") Reported-by: Oleg Nesterov Suggested-by: Oleg Nesterov Suggested-by: Dmitry V. Levin Signed-off-by: Eugene Syromiatnikov Link: https://lore.kernel.org/r/4b38fa4ce420b119a4c6345f42fe3cec2de9b0b5.1568223594.git.esyr@redhat.com [christian.brauner@ubuntu.com: simplify check and rework commit message] Signed-off-by: Christian Brauner --- kernel/fork.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/fork.c b/kernel/fork.c index 2852d0e76ea3..541fd805fb88 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2338,6 +2338,8 @@ struct mm_struct *copy_init_mm(void) * * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. + * + * args->exit_signal is expected to be checked for sanity by the caller. */ long _do_fork(struct kernel_clone_args *args) { @@ -2562,6 +2564,14 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, if (copy_from_user(&args, uargs, size)) return -EFAULT; + /* + * Verify that higher 32bits of exit_signal are unset and that + * it is a valid signal + */ + if (unlikely((args.exit_signal & ~((u64)CSIGNAL)) || + !valid_signal(args.exit_signal))) + return -EINVAL; + *kargs = (struct kernel_clone_args){ .flags = args.flags, .pidfd = u64_to_user_ptr(args.pidfd), From 505a8ec7e11ae5236c4a154a1e24ef49a8349600 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 12 Sep 2019 13:56:34 +0100 Subject: [PATCH 154/169] Revert "drm/i915/userptr: Acquire the page lock around set_page_dirty()" The userptr put_pages can be called from inside try_to_unmap, and so enters with the page lock held on one of the object's backing pages. We cannot take the page lock ourselves for fear of recursion. Reported-by: Lionel Landwerlin Reported-by: Martin Wilck Reported-by: Leo Kraav Fixes: aa56a292ce62 ("drm/i915/userptr: Acquire the page lock around set_page_dirty()") References: https://bugzilla.kernel.org/show_bug.cgi?id=203317 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Jani Nikula Cc: Joonas Lahtinen Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 2caa594322bc..528b61678334 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -664,15 +664,7 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, for_each_sgt_page(page, sgt_iter, pages) { if (obj->mm.dirty) - /* - * As this may not be anonymous memory (e.g. shmem) - * but exist on a real mapping, we have to lock - * the page in order to dirty it -- holding - * the page reference is not sufficient to - * prevent the inode from being truncated. - * Play safe and take the lock. - */ - set_page_dirty_lock(page); + set_page_dirty(page); mark_page_accessed(page); put_page(page); From 44e9d308a51fbf6d2e5a913f7452a5d5f1902249 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 12 Sep 2019 10:56:44 -0700 Subject: [PATCH 155/169] kselftests: cgroup: add freezer mkdir test Add a new cgroup freezer selftest, which checks that if a cgroup is frozen, their new child cgroups will properly inherit the frozen state. It creates a parent cgroup, freezes it, creates a child cgroup and populates it with a dummy process. Then it checks that both parent and child cgroup are frozen. Signed-off-by: Roman Gushchin Cc: Tejun Heo Cc: Shuah Khan Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_freezer.c | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c index 8219a30853d2..0fc1b6d4b0f9 100644 --- a/tools/testing/selftests/cgroup/test_freezer.c +++ b/tools/testing/selftests/cgroup/test_freezer.c @@ -447,6 +447,59 @@ static int test_cgfreezer_forkbomb(const char *root) return ret; } +/* + * The test creates a cgroups and freezes it. Then it creates a child cgroup + * and populates it with a task. After that it checks that the child cgroup + * is frozen and the parent cgroup remains frozen too. + */ +static int test_cgfreezer_mkdir(const char *root) +{ + int ret = KSFT_FAIL; + char *parent, *child = NULL; + int pid; + + parent = cg_name(root, "cg_test_mkdir_A"); + if (!parent) + goto cleanup; + + child = cg_name(parent, "cg_test_mkdir_B"); + if (!child) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_freeze_wait(parent, true)) + goto cleanup; + + if (cg_create(child)) + goto cleanup; + + pid = cg_run_nowait(child, child_fn, NULL); + if (pid < 0) + goto cleanup; + + if (cg_wait_for_proc_count(child, 1)) + goto cleanup; + + if (cg_check_frozen(child, true)) + goto cleanup; + + if (cg_check_frozen(parent, true)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (child) + cg_destroy(child); + free(child); + if (parent) + cg_destroy(parent); + free(parent); + return ret; +} + /* * The test creates two nested cgroups, freezes the parent * and removes the child. Then it checks that the parent cgroup @@ -815,6 +868,7 @@ struct cgfreezer_test { T(test_cgfreezer_simple), T(test_cgfreezer_tree), T(test_cgfreezer_forkbomb), + T(test_cgfreezer_mkdir), T(test_cgfreezer_rmdir), T(test_cgfreezer_migrate), T(test_cgfreezer_ptrace), From 97a61369830ab085df5aed0ff9256f35b07d425a Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 12 Sep 2019 10:56:45 -0700 Subject: [PATCH 156/169] cgroup: freezer: fix frozen state inheritance If a new child cgroup is created in the frozen cgroup hierarchy (one or more of ancestor cgroups is frozen), the CGRP_FREEZE cgroup flag should be set. Otherwise if a process will be attached to the child cgroup, it won't become frozen. The problem can be reproduced with the test_cgfreezer_mkdir test. This is the output before this patch: ~/test_freezer ok 1 test_cgfreezer_simple ok 2 test_cgfreezer_tree ok 3 test_cgfreezer_forkbomb Cgroup /sys/fs/cgroup/cg_test_mkdir_A/cg_test_mkdir_B isn't frozen not ok 4 test_cgfreezer_mkdir ok 5 test_cgfreezer_rmdir ok 6 test_cgfreezer_migrate ok 7 test_cgfreezer_ptrace ok 8 test_cgfreezer_stopped ok 9 test_cgfreezer_ptraced ok 10 test_cgfreezer_vfork And with this patch: ~/test_freezer ok 1 test_cgfreezer_simple ok 2 test_cgfreezer_tree ok 3 test_cgfreezer_forkbomb ok 4 test_cgfreezer_mkdir ok 5 test_cgfreezer_rmdir ok 6 test_cgfreezer_migrate ok 7 test_cgfreezer_ptrace ok 8 test_cgfreezer_stopped ok 9 test_cgfreezer_ptraced ok 10 test_cgfreezer_vfork Reported-by: Mark Crossen Signed-off-by: Roman Gushchin Fixes: 76f969e8948d ("cgroup: cgroup v2 freezer") Cc: Tejun Heo Cc: stable@vger.kernel.org # v5.2+ Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 753afbca549f..8be1da1ebd9a 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5255,8 +5255,16 @@ static struct cgroup *cgroup_create(struct cgroup *parent) * if the parent has to be frozen, the child has too. */ cgrp->freezer.e_freeze = parent->freezer.e_freeze; - if (cgrp->freezer.e_freeze) + if (cgrp->freezer.e_freeze) { + /* + * Set the CGRP_FREEZE flag, so when a process will be + * attached to the child cgroup, it will become frozen. + * At this point the new cgroup is unpopulated, so we can + * consider it frozen immediately. + */ + set_bit(CGRP_FREEZE, &cgrp->flags); set_bit(CGRP_FROZEN, &cgrp->flags); + } spin_lock_irq(&css_set_lock); for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) { From 8861474a105c91e812d38cd65d3be795c7f32c4f Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 13 Sep 2019 10:03:15 +0200 Subject: [PATCH 157/169] Revert "mmc: tmio: move runtime PM enablement to the driver implementations" This reverts commit 7ff213193310ef8d0ee5f04f79d791210787ac2c. It turns out that the above commit introduces other problems. For example, calling pm_runtime_set_active() must not be done prior calling pm_runtime_enable() as that makes it fail. This leads to additional problems, such as clock enables being wrongly balanced. Rather than fixing the problem on top, let's start over by doing a revert. Fixes: 7ff213193310 ("mmc: tmio: move runtime PM enablement to the driver implementations") Signed-off-by: Ulf Hansson Tested-by: Geert Uytterhoeven --- drivers/mmc/host/renesas_sdhi_core.c | 6 ------ drivers/mmc/host/tmio_mmc.c | 5 ----- drivers/mmc/host/tmio_mmc_core.c | 11 ++--------- drivers/mmc/host/uniphier-sd.c | 3 --- 4 files changed, 2 insertions(+), 23 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 64d3b5fb7fe5..4a2872f49a60 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -774,8 +774,6 @@ int renesas_sdhi_probe(struct platform_device *pdev, /* All SDHI have SDIO status bits which must be 1 */ mmc_data->flags |= TMIO_MMC_SDIO_STATUS_SETBITS; - pm_runtime_enable(&pdev->dev); - ret = renesas_sdhi_clk_enable(host); if (ret) goto efree; @@ -856,8 +854,6 @@ int renesas_sdhi_probe(struct platform_device *pdev, efree: tmio_mmc_host_free(host); - pm_runtime_disable(&pdev->dev); - return ret; } EXPORT_SYMBOL_GPL(renesas_sdhi_probe); @@ -869,8 +865,6 @@ int renesas_sdhi_remove(struct platform_device *pdev) tmio_mmc_host_remove(host); renesas_sdhi_clk_disable(host); - pm_runtime_disable(&pdev->dev); - return 0; } EXPORT_SYMBOL_GPL(renesas_sdhi_remove); diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c index 8539e10784b4..93e83ad25976 100644 --- a/drivers/mmc/host/tmio_mmc.c +++ b/drivers/mmc/host/tmio_mmc.c @@ -172,8 +172,6 @@ static int tmio_mmc_probe(struct platform_device *pdev) host->mmc->f_max = pdata->hclk; host->mmc->f_min = pdata->hclk / 512; - pm_runtime_enable(&pdev->dev); - ret = tmio_mmc_host_probe(host); if (ret) goto host_free; @@ -193,7 +191,6 @@ static int tmio_mmc_probe(struct platform_device *pdev) tmio_mmc_host_remove(host); host_free: tmio_mmc_host_free(host); - pm_runtime_disable(&pdev->dev); cell_disable: if (cell->disable) cell->disable(pdev); @@ -210,8 +207,6 @@ static int tmio_mmc_remove(struct platform_device *pdev) if (cell->disable) cell->disable(pdev); - pm_runtime_disable(&pdev->dev); - return 0; } diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 2cb3f951c3e2..8b299c1f0069 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -1153,15 +1153,6 @@ void tmio_mmc_host_free(struct tmio_mmc_host *host) } EXPORT_SYMBOL_GPL(tmio_mmc_host_free); -/** - * tmio_mmc_host_probe() - Common probe for all implementations - * @_host: Host to probe - * - * Perform tasks common to all implementations probe functions. - * - * The caller should have called pm_runtime_enable() prior to calling - * the common probe function. - */ int tmio_mmc_host_probe(struct tmio_mmc_host *_host) { struct platform_device *pdev = _host->pdev; @@ -1260,6 +1251,7 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host) pm_runtime_set_active(&pdev->dev); pm_runtime_set_autosuspend_delay(&pdev->dev, 50); pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_enable(&pdev->dev); ret = mmc_add_host(mmc); if (ret) @@ -1295,6 +1287,7 @@ void tmio_mmc_host_remove(struct tmio_mmc_host *host) pm_runtime_dont_use_autosuspend(&pdev->dev); pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); } EXPORT_SYMBOL_GPL(tmio_mmc_host_remove); diff --git a/drivers/mmc/host/uniphier-sd.c b/drivers/mmc/host/uniphier-sd.c index 49aad9a79c18..91a2be41edf6 100644 --- a/drivers/mmc/host/uniphier-sd.c +++ b/drivers/mmc/host/uniphier-sd.c @@ -631,7 +631,6 @@ static int uniphier_sd_probe(struct platform_device *pdev) host->clk_disable = uniphier_sd_clk_disable; host->set_clock = uniphier_sd_set_clock; - pm_runtime_enable(&pdev->dev); ret = uniphier_sd_clk_enable(host); if (ret) goto free_host; @@ -653,7 +652,6 @@ static int uniphier_sd_probe(struct platform_device *pdev) free_host: tmio_mmc_host_free(host); - pm_runtime_disable(&pdev->dev); return ret; } @@ -664,7 +662,6 @@ static int uniphier_sd_remove(struct platform_device *pdev) tmio_mmc_host_remove(host); uniphier_sd_clk_disable(host); - pm_runtime_disable(&pdev->dev); return 0; } From aa86f1a3887523d78bfadd1c4e4df8f919336511 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 13 Sep 2019 11:19:26 +0200 Subject: [PATCH 158/169] mmc: tmio: Fixup runtime PM management during probe The tmio_mmc_host_probe() calls pm_runtime_set_active() to update the runtime PM status of the device, as to make it reflect the current status of the HW. This works fine for most cases, but unfortunate not for all. Especially, there is a generic problem when the device has a genpd attached and that genpd have the ->start|stop() callbacks assigned. More precisely, if the driver calls pm_runtime_set_active() during ->probe(), genpd does not get to invoke the ->start() callback for it, which means the HW isn't really fully powered on. Furthermore, in the next phase, when the device becomes runtime suspended, genpd will invoke the ->stop() callback for it, potentially leading to usage count imbalance problems, depending on what's implemented behind the callbacks of course. To fix this problem, convert to call pm_runtime_get_sync() from tmio_mmc_host_probe() rather than pm_runtime_set_active(). Additionally, to avoid bumping usage counters and unnecessary re-initializing the HW the first time the tmio driver's ->runtime_resume() callback is called, introduce a state flag to keeping track of this. Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Tested-by: Geert Uytterhoeven --- drivers/mmc/host/tmio_mmc.h | 1 + drivers/mmc/host/tmio_mmc_core.c | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h index c5ba13fae399..2f0b092d6dcc 100644 --- a/drivers/mmc/host/tmio_mmc.h +++ b/drivers/mmc/host/tmio_mmc.h @@ -163,6 +163,7 @@ struct tmio_mmc_host { unsigned long last_req_ts; struct mutex ios_lock; /* protect set_ios() context */ bool native_hotplug; + bool runtime_synced; bool sdio_irq_enabled; /* Mandatory callback */ diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 8b299c1f0069..32f9679ec42e 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -1248,20 +1248,22 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host) /* See if we also get DMA */ tmio_mmc_request_dma(_host, pdata); - pm_runtime_set_active(&pdev->dev); pm_runtime_set_autosuspend_delay(&pdev->dev, 50); pm_runtime_use_autosuspend(&pdev->dev); pm_runtime_enable(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); ret = mmc_add_host(mmc); if (ret) goto remove_host; dev_pm_qos_expose_latency_limit(&pdev->dev, 100); + pm_runtime_put(&pdev->dev); return 0; remove_host: + pm_runtime_put_noidle(&pdev->dev); tmio_mmc_host_remove(_host); return ret; } @@ -1330,6 +1332,11 @@ int tmio_mmc_host_runtime_resume(struct device *dev) { struct tmio_mmc_host *host = dev_get_drvdata(dev); + if (!host->runtime_synced) { + host->runtime_synced = true; + return 0; + } + tmio_mmc_clk_enable(host); tmio_mmc_hw_reset(host->mmc); From 87b5d602a1cc76169b8d81ec2c74c8d95d9350dc Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 13 Sep 2019 11:20:22 +0200 Subject: [PATCH 159/169] mmc: tmio: Fixup runtime PM management during remove Accessing the device when it may be runtime suspended is a bug, which is the case in tmio_mmc_host_remove(). Let's fix the behaviour. Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Tested-by: Geert Uytterhoeven --- drivers/mmc/host/tmio_mmc_core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 32f9679ec42e..9b6e1001e77c 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -1274,12 +1274,11 @@ void tmio_mmc_host_remove(struct tmio_mmc_host *host) struct platform_device *pdev = host->pdev; struct mmc_host *mmc = host->mmc; + pm_runtime_get_sync(&pdev->dev); + if (host->pdata->flags & TMIO_MMC_SDIO_IRQ) sd_ctrl_write16(host, CTL_TRANSACTION_CTL, 0x0000); - if (!host->native_hotplug) - pm_runtime_get_sync(&pdev->dev); - dev_pm_qos_hide_latency_limit(&pdev->dev); mmc_remove_host(mmc); @@ -1288,6 +1287,8 @@ void tmio_mmc_host_remove(struct tmio_mmc_host *host) tmio_mmc_release_dma(host); pm_runtime_dont_use_autosuspend(&pdev->dev); + if (host->native_hotplug) + pm_runtime_put_noidle(&pdev->dev); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); } From 8f6617badcc96a582678ea36ea96490c5ff26eb4 Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Thu, 12 Sep 2019 12:07:34 -0700 Subject: [PATCH 160/169] ixgbevf: Fix secpath usage for IPsec Tx offload Port the same fix for ixgbe to ixgbevf. The ixgbevf driver currently does IPsec Tx offloading based on an existing secpath. However, the secpath can also come from the Rx side, in this case it is misinterpreted for Tx offload and the packets are dropped with a "bad sa_idx" error. Fix this by using the xfrm_offload() function to test for Tx offload. CC: Shannon Nelson Fixes: 7f68d4306701 ("ixgbevf: enable VF IPsec offload operations") Reported-by: Jonathan Tooker Signed-off-by: Jeff Kirsher Acked-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index d2b41f9f87f8..72872d6ca80c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "ixgbevf.h" @@ -4161,7 +4162,7 @@ static int ixgbevf_xmit_frame_ring(struct sk_buff *skb, first->protocol = vlan_get_protocol(skb); #ifdef CONFIG_IXGBEVF_IPSEC - if (secpath_exists(skb) && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx)) + if (xfrm_offload(skb) && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx)) goto out_drop; #endif tso = ixgbevf_tso(tx_ring, first, &hdr_len, &ipsec_tx); From 8e2ef6abd4653b1cfeefe262017ecdd5eaf40fef Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Thu, 12 Sep 2019 12:02:17 +0800 Subject: [PATCH 161/169] sctp: change return type of sctp_get_port_local Currently sctp_get_port_local() returns a long which is either 0,1 or a pointer casted to long. It's neither of the callers use the return value since commit 62208f12451f ("net: sctp: simplify sctp_get_port"). Now two callers are sctp_get_port and sctp_do_bind, they actually assumend a casted to an int was the same as a pointer casted to a long, and they don't save the return value just check whether it is zero or non-zero, so it would better change return type from long to int for sctp_get_port_local. Signed-off-by: Mao Wenan Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ad8751891b00..1e26606683a8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -309,7 +309,7 @@ static int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len) return retval; } -static long sctp_get_port_local(struct sock *, union sctp_addr *); +static int sctp_get_port_local(struct sock *, union sctp_addr *); /* Verify this is a valid sockaddr. */ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, @@ -7999,7 +7999,7 @@ static void sctp_unhash(struct sock *sk) static struct sctp_bind_bucket *sctp_bucket_create( struct sctp_bind_hashbucket *head, struct net *, unsigned short snum); -static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) +static int sctp_get_port_local(struct sock *sk, union sctp_addr *addr) { struct sctp_sock *sp = sctp_sk(sk); bool reuse = (sk->sk_reuse || sp->reuse); @@ -8109,7 +8109,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) if (sctp_bind_addr_conflict(&ep2->base.bind_addr, addr, sp2, sp)) { - ret = (long)sk2; + ret = 1; goto fail_unlock; } } @@ -8181,7 +8181,7 @@ static int sctp_get_port(struct sock *sk, unsigned short snum) addr.v4.sin_port = htons(snum); /* Note: sk->sk_num gets filled in if ephemeral port request. */ - return !!sctp_get_port_local(sk, &addr); + return sctp_get_port_local(sk, &addr); } /* From e0e4b8de10ad3affe83cc91a1cf6d1e48cca333e Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Thu, 12 Sep 2019 12:02:18 +0800 Subject: [PATCH 162/169] sctp: remove redundant assignment when call sctp_get_port_local There are more parentheses in if clause when call sctp_get_port_local in sctp_do_bind, and redundant assignment to 'ret'. This patch is to do cleanup. Signed-off-by: Mao Wenan Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1e26606683a8..5fe26ca6edbd 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -399,9 +399,8 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) * detection. */ addr->v4.sin_port = htons(snum); - if ((ret = sctp_get_port_local(sk, addr))) { + if (sctp_get_port_local(sk, addr)) return -EADDRINUSE; - } /* Refresh ephemeral port. */ if (!bp->port) From 29b99f54a8e63f871cf4060406d09fd8b0d744d8 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Thu, 12 Sep 2019 12:02:19 +0800 Subject: [PATCH 163/169] sctp: destroy bucket if failed to bind addr There is one memory leak bug report: BUG: memory leak unreferenced object 0xffff8881dc4c5ec0 (size 40): comm "syz-executor.0", pid 5673, jiffies 4298198457 (age 27.578s) hex dump (first 32 bytes): 02 00 00 00 81 88 ff ff 00 00 00 00 00 00 00 00 ................ f8 63 3d c1 81 88 ff ff 00 00 00 00 00 00 00 00 .c=............. backtrace: [<0000000072006339>] sctp_get_port_local+0x2a1/0xa00 [sctp] [<00000000c7b379ec>] sctp_do_bind+0x176/0x2c0 [sctp] [<000000005be274a2>] sctp_bind+0x5a/0x80 [sctp] [<00000000b66b4044>] inet6_bind+0x59/0xd0 [ipv6] [<00000000c68c7f42>] __sys_bind+0x120/0x1f0 net/socket.c:1647 [<000000004513635b>] __do_sys_bind net/socket.c:1658 [inline] [<000000004513635b>] __se_sys_bind net/socket.c:1656 [inline] [<000000004513635b>] __x64_sys_bind+0x3e/0x50 net/socket.c:1656 [<0000000061f2501e>] do_syscall_64+0x72/0x2e0 arch/x86/entry/common.c:296 [<0000000003d1e05e>] entry_SYSCALL_64_after_hwframe+0x49/0xbe This is because in sctp_do_bind, if sctp_get_port_local is to create hash bucket successfully, and sctp_add_bind_addr failed to bind address, e.g return -ENOMEM, so memory leak found, it needs to destroy allocated bucket. Reported-by: Hulk Robot Signed-off-by: Mao Wenan Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5fe26ca6edbd..b083d4e66230 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -412,11 +412,13 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) ret = sctp_add_bind_addr(bp, addr, af->sockaddr_len, SCTP_ADDR_SRC, GFP_ATOMIC); - /* Copy back into socket for getsockname() use. */ - if (!ret) { - inet_sk(sk)->inet_sport = htons(inet_sk(sk)->inet_num); - sp->pf->to_sk_saddr(addr, sk); + if (ret) { + sctp_put_port(sk); + return ret; } + /* Copy back into socket for getsockname() use. */ + inet_sk(sk)->inet_sport = htons(inet_sk(sk)->inet_num); + sp->pf->to_sk_saddr(addr, sk); return ret; } From 4d7ffcf3bf1be98d876c570cab8fc31d9fa92725 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Thu, 12 Sep 2019 10:42:00 +0200 Subject: [PATCH 164/169] cdc_ether: fix rndis support for Mediatek based smartphones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A Mediatek based smartphone owner reports problems with USB tethering in Linux. The verbose USB listing shows a rndis_host interface pair (e0/01/03 + 10/00/00), but the driver fails to bind with [ 355.960428] usb 1-4: bad CDC descriptors The problem is a failsafe test intended to filter out ACM serial functions using the same 02/02/ff class/subclass/protocol as RNDIS. The serial functions are recognized by their non-zero bmCapabilities. No RNDIS function with non-zero bmCapabilities were known at the time this failsafe was added. But it turns out that some Wireless class RNDIS functions are using the bmCapabilities field. These functions are uniquely identified as RNDIS by their class/subclass/protocol, so the failing test can safely be disabled. The same applies to the two types of Misc class RNDIS functions. Applying the failsafe to Communication class functions only retains the original functionality, and fixes the problem for the Mediatek based smartphone. Tow examples of CDC functional descriptors with non-zero bmCapabilities from Wireless class RNDIS functions are: 0e8d:000a Mediatek Crosscall Spider X5 3G Phone CDC Header: bcdCDC 1.10 CDC ACM: bmCapabilities 0x0f connection notifications sends break line coding and serial state get/set/clear comm features CDC Union: bMasterInterface 0 bSlaveInterface 1 CDC Call Management: bmCapabilities 0x03 call management use DataInterface bDataInterface 1 and 19d2:1023 ZTE K4201-z CDC Header: bcdCDC 1.10 CDC ACM: bmCapabilities 0x02 line coding and serial state CDC Call Management: bmCapabilities 0x03 call management use DataInterface bDataInterface 1 CDC Union: bMasterInterface 0 bSlaveInterface 1 The Mediatek example is believed to apply to most smartphones with Mediatek firmware. The ZTE example is most likely also part of a larger family of devices/firmwares. Suggested-by: Lars Melin Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 8458e88c18e9..32f53de5b1fe 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -206,7 +206,15 @@ int usbnet_generic_cdc_bind(struct usbnet *dev, struct usb_interface *intf) goto bad_desc; } skip: - if (rndis && header.usb_cdc_acm_descriptor && + /* Communcation class functions with bmCapabilities are not + * RNDIS. But some Wireless class RNDIS functions use + * bmCapabilities for their own purpose. The failsafe is + * therefore applied only to Communication class RNDIS + * functions. The rndis test is redundant, but a cheap + * optimization. + */ + if (rndis && is_rndis(&intf->cur_altsetting->desc) && + header.usb_cdc_acm_descriptor && header.usb_cdc_acm_descriptor->bmCapabilities) { dev_dbg(&intf->dev, "ACM capabilities %02x, not really RNDIS?\n", From 474efecb65dceb15f793b6e2f2b226e952f0f8e9 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Fri, 13 Sep 2019 18:35:50 -0700 Subject: [PATCH 165/169] riscv: modify the Image header to improve compatibility with the ARM64 header Part of the intention during the definition of the RISC-V kernel image header was to lay the groundwork for a future merge with the ARM64 image header. One error during my original review was not noticing that the RISC-V header's "magic" field was at a different size and position than the ARM64's "magic" field. If the existing ARM64 Image header parsing code were to attempt to parse an existing RISC-V kernel image header format, it would see a magic number 0. This is undesirable, since it's our intention to align as closely as possible with the ARM64 header format. Another problem was that the original "res3" field was not being initialized correctly to zero. Address these issues by creating a 32-bit "magic2" field in the RISC-V header which matches the ARM64 "magic" field. RISC-V binaries will store "RSC\x05" in this field. The intention is that the use of the existing 64-bit "magic" field in the RISC-V header will be deprecated over time. Increment the minor version number of the file format to indicate this change, and update the documentation accordingly. Fix the assembler directives in head.S to ensure that reserved fields are properly zero-initialized. Signed-off-by: Paul Walmsley Reported-by: Palmer Dabbelt Reviewed-by: Palmer Dabbelt Cc: Atish Patra Cc: Karsten Merker Link: https://lore.kernel.org/linux-riscv/194c2f10c9806720623430dbf0cc59a965e50448.camel@wdc.com/T/#u Link: https://lore.kernel.org/linux-riscv/mhng-755b14c4-8f35-4079-a7ff-e421fd1b02bc@palmer-si-x1e/T/#t --- Documentation/riscv/boot-image-header.txt | 13 +++++++------ arch/riscv/include/asm/image.h | 12 ++++++------ arch/riscv/kernel/head.S | 4 ++-- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/Documentation/riscv/boot-image-header.txt b/Documentation/riscv/boot-image-header.txt index 1b73fea23b39..14b1492f689b 100644 --- a/Documentation/riscv/boot-image-header.txt +++ b/Documentation/riscv/boot-image-header.txt @@ -18,7 +18,7 @@ The following 64-byte header is present in decompressed Linux kernel image. u32 res1 = 0; /* Reserved */ u64 res2 = 0; /* Reserved */ u64 magic = 0x5643534952; /* Magic number, little endian, "RISCV" */ - u32 res3; /* Reserved for additional RISC-V specific header */ + u32 magic2 = 0x56534905; /* Magic number 2, little endian, "RSC\x05" */ u32 res4; /* Reserved for PE COFF offset */ This header format is compliant with PE/COFF header and largely inspired from @@ -37,13 +37,14 @@ Notes: Bits 16:31 - Major version This preserves compatibility across newer and older version of the header. - The current version is defined as 0.1. + The current version is defined as 0.2. -- res3 is reserved for offset to any other additional fields. This makes the - header extendible in future. One example would be to accommodate ISA - extension for RISC-V in future. For current version, it is set to be zero. +- The "magic" field is deprecated as of version 0.2. In a future + release, it may be removed. This originally should have matched up + with the ARM64 header "magic" field, but unfortunately does not. + The "magic2" field replaces it, matching up with the ARM64 header. -- In current header, the flag field has only one field. +- In current header, the flags field has only one field. Bit 0: Kernel endianness. 1 if BE, 0 if LE. - Image size is mandatory for boot loader to load kernel image. Booting will diff --git a/arch/riscv/include/asm/image.h b/arch/riscv/include/asm/image.h index ef28e106f247..344db5244547 100644 --- a/arch/riscv/include/asm/image.h +++ b/arch/riscv/include/asm/image.h @@ -3,7 +3,8 @@ #ifndef __ASM_IMAGE_H #define __ASM_IMAGE_H -#define RISCV_IMAGE_MAGIC "RISCV" +#define RISCV_IMAGE_MAGIC "RISCV\0\0\0" +#define RISCV_IMAGE_MAGIC2 "RSC\x05" #define RISCV_IMAGE_FLAG_BE_SHIFT 0 #define RISCV_IMAGE_FLAG_BE_MASK 0x1 @@ -23,7 +24,7 @@ #define __HEAD_FLAGS (__HEAD_FLAG(BE)) #define RISCV_HEADER_VERSION_MAJOR 0 -#define RISCV_HEADER_VERSION_MINOR 1 +#define RISCV_HEADER_VERSION_MINOR 2 #define RISCV_HEADER_VERSION (RISCV_HEADER_VERSION_MAJOR << 16 | \ RISCV_HEADER_VERSION_MINOR) @@ -39,9 +40,8 @@ * @version: version * @res1: reserved * @res2: reserved - * @magic: Magic number - * @res3: reserved (will be used for additional RISC-V specific - * header) + * @magic: Magic number (RISC-V specific; deprecated) + * @magic2: Magic number 2 (to match the ARM64 'magic' field pos) * @res4: reserved (will be used for PE COFF offset) * * The intention is for this header format to be shared between multiple @@ -58,7 +58,7 @@ struct riscv_image_header { u32 res1; u64 res2; u64 magic; - u32 res3; + u32 magic2; u32 res4; }; #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 0f1ba17e476f..52eec0c1bf30 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -39,9 +39,9 @@ ENTRY(_start) .word RISCV_HEADER_VERSION .word 0 .dword 0 - .asciz RISCV_IMAGE_MAGIC - .word 0 + .ascii RISCV_IMAGE_MAGIC .balign 4 + .ascii RISCV_IMAGE_MAGIC2 .word 0 .global _start_kernel From f7eea636c3d505fe6f1d1066234f1aaf7171b681 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 14 Sep 2019 00:26:27 +0200 Subject: [PATCH 166/169] KVM: nVMX: handle page fault in vmread The implementation of vmread to memory is still incomplete, as it lacks the ability to do vmread to I/O memory just like vmptrst. Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index ced9fba32598..a3cba321b5c5 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4540,6 +4540,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) int len; gva_t gva = 0; struct vmcs12 *vmcs12; + struct x86_exception e; short offset; if (!nested_vmx_check_permission(vcpu)) @@ -4588,7 +4589,8 @@ static int handle_vmread(struct kvm_vcpu *vcpu) vmx_instruction_info, true, len, &gva)) return 1; /* _system ok, nested_vmx_check_permission has verified cpl=0 */ - kvm_write_guest_virt_system(vcpu, gva, &field_value, len, NULL); + if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e)) + kvm_inject_page_fault(vcpu, &e); } return nested_vmx_succeed(vcpu); From 541ab2aeb28251bf7135c7961f3a6080eebcc705 Mon Sep 17 00:00:00 2001 From: Fuqian Huang Date: Thu, 12 Sep 2019 12:18:17 +0800 Subject: [PATCH 167/169] KVM: x86: work around leak of uninitialized stack contents Emulation of VMPTRST can incorrectly inject a page fault when passed an operand that points to an MMIO address. The page fault will use uninitialized kernel stack memory as the CR2 and error code. The right behavior would be to abort the VM with a KVM_EXIT_INTERNAL_ERROR exit to userspace; however, it is not an easy fix, so for now just ensure that the error code and CR2 are zero. Signed-off-by: Fuqian Huang Cc: stable@vger.kernel.org [add comment] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 290c3c3efb87..91602d310a3f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5312,6 +5312,13 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val, /* kvm_write_guest_virt_system can pull in tons of pages. */ vcpu->arch.l1tf_flush_l1d = true; + /* + * FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED + * is returned, but our callers are not ready for that and they blindly + * call kvm_inject_page_fault. Ensure that they at least do not leak + * uninitialized kernel stack memory into cr2 and error code. + */ + memset(exception, 0, sizeof(*exception)); return kvm_write_guest_virt_helper(addr, val, bytes, vcpu, PFERR_WRITE_MASK, exception); } From 002c5f73c508f7df5681bda339831c27f3c1aef4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 12 Sep 2019 19:46:02 -0700 Subject: [PATCH 168/169] KVM: x86/mmu: Reintroduce fast invalidate/zap for flushing memslot James Harvey reported a livelock that was introduced by commit d012a06ab1d23 ("Revert "KVM: x86/mmu: Zap only the relevant pages when removing a memslot""). The livelock occurs because kvm_mmu_zap_all() as it exists today will voluntarily reschedule and drop KVM's mmu_lock, which allows other vCPUs to add shadow pages. With enough vCPUs, kvm_mmu_zap_all() can get stuck in an infinite loop as it can never zap all pages before observing lock contention or the need to reschedule. The equivalent of kvm_mmu_zap_all() that was in use at the time of the reverted commit (4e103134b8623, "KVM: x86/mmu: Zap only the relevant pages when removing a memslot") employed a fast invalidate mechanism and was not susceptible to the above livelock. There are three ways to fix the livelock: - Reverting the revert (commit d012a06ab1d23) is not a viable option as the revert is needed to fix a regression that occurs when the guest has one or more assigned devices. It's unlikely we'll root cause the device assignment regression soon enough to fix the regression timely. - Remove the conditional reschedule from kvm_mmu_zap_all(). However, although removing the reschedule would be a smaller code change, it's less safe in the sense that the resulting kvm_mmu_zap_all() hasn't been used in the wild for flushing memslots since the fast invalidate mechanism was introduced by commit 6ca18b6950f8d ("KVM: x86: use the fast way to invalidate all pages"), back in 2013. - Reintroduce the fast invalidate mechanism and use it when zapping shadow pages in response to a memslot being deleted/moved, which is what this patch does. For all intents and purposes, this is a revert of commit ea145aacf4ae8 ("Revert "KVM: MMU: fast invalidate all pages"") and a partial revert of commit 7390de1e99a70 ("Revert "KVM: x86: use the fast way to invalidate all pages""), i.e. restores the behavior of commit 5304b8d37c2a5 ("KVM: MMU: fast invalidate all pages") and commit 6ca18b6950f8d ("KVM: x86: use the fast way to invalidate all pages") respectively. Fixes: d012a06ab1d23 ("Revert "KVM: x86/mmu: Zap only the relevant pages when removing a memslot"") Reported-by: James Harvey Cc: Alex Willamson Cc: Paolo Bonzini Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 + arch/x86/kvm/mmu.c | 101 +++++++++++++++++++++++++++++++- 2 files changed, 101 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 74e88e5edd9c..bdc16b0aa7c6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -335,6 +335,7 @@ struct kvm_mmu_page { int root_count; /* Currently serving as active root */ unsigned int unsync_children; struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ + unsigned long mmu_valid_gen; DECLARE_BITMAP(unsync_child_bitmap, 512); #ifdef CONFIG_X86_32 @@ -856,6 +857,7 @@ struct kvm_arch { unsigned long n_requested_mmu_pages; unsigned long n_max_mmu_pages; unsigned int indirect_shadow_pages; + unsigned long mmu_valid_gen; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; /* * Hash table of struct kvm_mmu_page. diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 218b277bfda3..a63964e7cec7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2095,6 +2095,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct if (!direct) sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); set_page_private(virt_to_page(sp->spt), (unsigned long)sp); + + /* + * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages() + * depends on valid pages being added to the head of the list. See + * comments in kvm_zap_obsolete_pages(). + */ list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); kvm_mod_used_mmu_pages(vcpu->kvm, +1); return sp; @@ -2244,7 +2250,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, #define for_each_valid_sp(_kvm, _sp, _gfn) \ hlist_for_each_entry(_sp, \ &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ - if ((_sp)->role.invalid) { \ + if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) { \ } else #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ @@ -2301,6 +2307,11 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { } static void mmu_audit_disable(void) { } #endif +static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); +} + static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, struct list_head *invalid_list) { @@ -2525,6 +2536,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, if (level > PT_PAGE_TABLE_LEVEL && need_sync) flush |= kvm_sync_pages(vcpu, gfn, &invalid_list); } + sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; clear_page(sp->spt); trace_kvm_mmu_get_page(sp, true); @@ -4233,6 +4245,13 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, return false; if (cached_root_available(vcpu, new_cr3, new_role)) { + /* + * It is possible that the cached previous root page is + * obsolete because of a change in the MMU generation + * number. However, changing the generation number is + * accompanied by KVM_REQ_MMU_RELOAD, which will free + * the root set here and allocate a new one. + */ kvm_make_request(KVM_REQ_LOAD_CR3, vcpu); if (!skip_tlb_flush) { kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); @@ -5649,11 +5668,89 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) return alloc_mmu_pages(vcpu); } + +static void kvm_zap_obsolete_pages(struct kvm *kvm) +{ + struct kvm_mmu_page *sp, *node; + LIST_HEAD(invalid_list); + int ign; + +restart: + list_for_each_entry_safe_reverse(sp, node, + &kvm->arch.active_mmu_pages, link) { + /* + * No obsolete valid page exists before a newly created page + * since active_mmu_pages is a FIFO list. + */ + if (!is_obsolete_sp(kvm, sp)) + break; + + /* + * Do not repeatedly zap a root page to avoid unnecessary + * KVM_REQ_MMU_RELOAD, otherwise we may not be able to + * progress: + * vcpu 0 vcpu 1 + * call vcpu_enter_guest(): + * 1): handle KVM_REQ_MMU_RELOAD + * and require mmu-lock to + * load mmu + * repeat: + * 1): zap root page and + * send KVM_REQ_MMU_RELOAD + * + * 2): if (cond_resched_lock(mmu-lock)) + * + * 2): hold mmu-lock and load mmu + * + * 3): see KVM_REQ_MMU_RELOAD bit + * on vcpu->requests is set + * then return 1 to call + * vcpu_enter_guest() again. + * goto repeat; + * + * Since we are reversely walking the list and the invalid + * list will be moved to the head, skip the invalid page + * can help us to avoid the infinity list walking. + */ + if (sp->role.invalid) + continue; + + if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { + kvm_mmu_commit_zap_page(kvm, &invalid_list); + cond_resched_lock(&kvm->mmu_lock); + goto restart; + } + + if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) + goto restart; + } + + kvm_mmu_commit_zap_page(kvm, &invalid_list); +} + +/* + * Fast invalidate all shadow pages and use lock-break technique + * to zap obsolete pages. + * + * It's required when memslot is being deleted or VM is being + * destroyed, in these cases, we should ensure that KVM MMU does + * not use any resource of the being-deleted slot or all slots + * after calling the function. + */ +static void kvm_mmu_zap_all_fast(struct kvm *kvm) +{ + spin_lock(&kvm->mmu_lock); + kvm->arch.mmu_valid_gen++; + + kvm_zap_obsolete_pages(kvm); + spin_unlock(&kvm->mmu_lock); +} + static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, struct kvm_page_track_notifier_node *node) { - kvm_mmu_zap_all(kvm); + kvm_mmu_zap_all_fast(kvm); } void kvm_mmu_init_vm(struct kvm *kvm) From 0d4a3f2abbef73b9e5bb5f12213c275565473588 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sat, 14 Sep 2019 15:21:51 -0400 Subject: [PATCH 169/169] Revert "vhost: block speculation of translated descriptors" This reverts commit a89db445fbd7f1f8457b03759aa7343fa530ef6b. I was hasty to include this patch, and it breaks the build on 32 bit. Defence in depth is good but let's do it properly. Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index acabf20b069e..36ca2cf419bf 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2071,10 +2071,8 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, _iov = iov + ret; size = node->size - addr + node->start; _iov->iov_len = min((u64)len - s, size); - _iov->iov_base = (void __user *) - ((unsigned long)node->userspace_addr + - array_index_nospec((unsigned long)(addr - node->start), - node->size)); + _iov->iov_base = (void __user *)(unsigned long) + (node->userspace_addr + addr - node->start); s += size; addr += size; ++ret;