mlxsw: spectrum_router: Populate adjacency entries according to weights

Up until now the driver assumed all the nexthops have an equal weight
and wrote each to a single adjacency entry.

This patch takes the `weight` parameter into account and populates the
adjacency group according to the relative weight of each nexthop.

Specifically, the weights of all the nexthops that should be offloaded
are first normalized and then used to calculate the upper adjacency
index of each nexthop. This is done according to the hash-threshold
algorithm used by the kernel for IPv4 multi-path routing.

Adjacency groups are currently limited to 32 entries which limits the
weights that can be used, but follow-up patches will introduce groups of
512 entries.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Ido Schimmel 2017-10-22 23:11:48 +02:00 committed by David S. Miller
parent 425a08c673
commit eb789980d0

View File

@ -46,6 +46,7 @@
#include <linux/if_bridge.h> #include <linux/if_bridge.h>
#include <linux/socket.h> #include <linux/socket.h>
#include <linux/route.h> #include <linux/route.h>
#include <linux/gcd.h>
#include <net/netevent.h> #include <net/netevent.h>
#include <net/neighbour.h> #include <net/neighbour.h>
#include <net/arp.h> #include <net/arp.h>
@ -2204,6 +2205,8 @@ struct mlxsw_sp_nexthop {
unsigned char gw_addr[sizeof(struct in6_addr)]; unsigned char gw_addr[sizeof(struct in6_addr)];
int ifindex; int ifindex;
int nh_weight; int nh_weight;
int norm_nh_weight;
int num_adj_entries;
struct mlxsw_sp_rif *rif; struct mlxsw_sp_rif *rif;
u8 should_offload:1, /* set indicates this neigh is connected and u8 should_offload:1, /* set indicates this neigh is connected and
* should be put to KVD linear area of this group. * should be put to KVD linear area of this group.
@ -2233,6 +2236,7 @@ struct mlxsw_sp_nexthop_group {
u32 adj_index; u32 adj_index;
u16 ecmp_size; u16 ecmp_size;
u16 count; u16 count;
int sum_norm_weight;
struct mlxsw_sp_nexthop nexthops[0]; struct mlxsw_sp_nexthop nexthops[0];
#define nh_rif nexthops[0].rif #define nh_rif nexthops[0].rif
}; };
@ -2318,7 +2322,7 @@ int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
if (nh_iter == nh) if (nh_iter == nh)
break; break;
if (nh_iter->offloaded) if (nh_iter->offloaded)
adj_hash_index++; adj_hash_index += nh_iter->num_adj_entries;
} }
*p_adj_hash_index = adj_hash_index; *p_adj_hash_index = adj_hash_index;
@ -2601,7 +2605,7 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
return 0; return 0;
} }
int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_nexthop *nh) struct mlxsw_sp_nexthop *nh)
{ {
struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
@ -2619,7 +2623,23 @@ int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
} }
static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_nexthop *nh)
{
int i;
for (i = 0; i < nh->num_adj_entries; i++) {
int err;
err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
if (err)
return err;
}
return 0;
}
static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
u32 adj_index, u32 adj_index,
struct mlxsw_sp_nexthop *nh) struct mlxsw_sp_nexthop *nh)
{ {
@ -2629,6 +2649,24 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry); return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
} }
static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
u32 adj_index,
struct mlxsw_sp_nexthop *nh)
{
int i;
for (i = 0; i < nh->num_adj_entries; i++) {
int err;
err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
nh);
if (err)
return err;
}
return 0;
}
static int static int
mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp, struct mlxsw_sp_nexthop_group *nh_grp,
@ -2663,7 +2701,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
nh->update = 0; nh->update = 0;
nh->offloaded = 1; nh->offloaded = 1;
} }
adj_index++; adj_index += nh->num_adj_entries;
} }
return 0; return 0;
} }
@ -2761,17 +2799,65 @@ static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
return 0; return 0;
} }
static void
mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
{
int i, g = 0, sum_norm_weight = 0;
struct mlxsw_sp_nexthop *nh;
for (i = 0; i < nh_grp->count; i++) {
nh = &nh_grp->nexthops[i];
if (!nh->should_offload)
continue;
if (g > 0)
g = gcd(nh->nh_weight, g);
else
g = nh->nh_weight;
}
for (i = 0; i < nh_grp->count; i++) {
nh = &nh_grp->nexthops[i];
if (!nh->should_offload)
continue;
nh->norm_nh_weight = nh->nh_weight / g;
sum_norm_weight += nh->norm_nh_weight;
}
nh_grp->sum_norm_weight = sum_norm_weight;
}
static void
mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
{
int total = nh_grp->sum_norm_weight;
u16 ecmp_size = nh_grp->ecmp_size;
int i, weight = 0, lower_bound = 0;
for (i = 0; i < nh_grp->count; i++) {
struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
int upper_bound;
if (!nh->should_offload)
continue;
weight += nh->norm_nh_weight;
upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
nh->num_adj_entries = upper_bound - lower_bound;
lower_bound = upper_bound;
}
}
static void static void
mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp) struct mlxsw_sp_nexthop_group *nh_grp)
{ {
u16 ecmp_size, old_ecmp_size;
struct mlxsw_sp_nexthop *nh; struct mlxsw_sp_nexthop *nh;
bool offload_change = false; bool offload_change = false;
u32 adj_index; u32 adj_index;
u16 ecmp_size = 0;
bool old_adj_index_valid; bool old_adj_index_valid;
u32 old_adj_index; u32 old_adj_index;
u16 old_ecmp_size;
int i; int i;
int err; int err;
@ -2788,8 +2874,6 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
if (nh->should_offload) if (nh->should_offload)
nh->update = 1; nh->update = 1;
} }
if (nh->should_offload)
ecmp_size++;
} }
if (!offload_change) { if (!offload_change) {
/* Nothing was added or removed, so no need to reallocate. Just /* Nothing was added or removed, so no need to reallocate. Just
@ -2802,12 +2886,14 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
} }
return; return;
} }
if (!ecmp_size) mlxsw_sp_nexthop_group_normalize(nh_grp);
if (!nh_grp->sum_norm_weight)
/* No neigh of this group is connected so we just set /* No neigh of this group is connected so we just set
* the trap and let everthing flow through kernel. * the trap and let everthing flow through kernel.
*/ */
goto set_trap; goto set_trap;
ecmp_size = nh_grp->sum_norm_weight;
err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size); err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
if (err) if (err)
/* No valid allocation size available. */ /* No valid allocation size available. */
@ -2827,6 +2913,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
nh_grp->adj_index_valid = 1; nh_grp->adj_index_valid = 1;
nh_grp->adj_index = adj_index; nh_grp->adj_index = adj_index;
nh_grp->ecmp_size = ecmp_size; nh_grp->ecmp_size = ecmp_size;
mlxsw_sp_nexthop_group_rebalance(nh_grp);
err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true); err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
if (err) { if (err) {
dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");