linux_dsm_epyc7002/drivers/base/regmap/regcache-rbtree.c

555 lines
14 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
//
// Register cache access API - rbtree caching support
//
// Copyright 2011 Wolfson Microelectronics plc
//
// Author: Dimitris Papastamos <dp@opensource.wolfsonmicro.com>
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/rbtree.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include "internal.h"
static int regcache_rbtree_write(struct regmap *map, unsigned int reg,
unsigned int value);
static int regcache_rbtree_exit(struct regmap *map);
struct regcache_rbtree_node {
/* block of adjacent registers */
void *block;
/* Which registers are present */
long *cache_present;
/* base register handled by this block */
unsigned int base_reg;
/* number of registers available in the block */
unsigned int blklen;
/* the actual rbtree node holding this block */
struct rb_node node;
};
struct regcache_rbtree_ctx {
struct rb_root root;
struct regcache_rbtree_node *cached_rbnode;
};
static inline void regcache_rbtree_get_base_top_reg(
struct regmap *map,
struct regcache_rbtree_node *rbnode,
unsigned int *base, unsigned int *top)
{
*base = rbnode->base_reg;
*top = rbnode->base_reg + ((rbnode->blklen - 1) * map->reg_stride);
}
static unsigned int regcache_rbtree_get_register(struct regmap *map,
struct regcache_rbtree_node *rbnode, unsigned int idx)
{
return regcache_get_val(map, rbnode->block, idx);
}
static void regcache_rbtree_set_register(struct regmap *map,
struct regcache_rbtree_node *rbnode,
unsigned int idx, unsigned int val)
{
set_bit(idx, rbnode->cache_present);
regcache_set_val(map, rbnode->block, idx, val);
}
static struct regcache_rbtree_node *regcache_rbtree_lookup(struct regmap *map,
unsigned int reg)
{
struct regcache_rbtree_ctx *rbtree_ctx = map->cache;
struct rb_node *node;
struct regcache_rbtree_node *rbnode;
unsigned int base_reg, top_reg;
rbnode = rbtree_ctx->cached_rbnode;
if (rbnode) {
regcache_rbtree_get_base_top_reg(map, rbnode, &base_reg,
&top_reg);
if (reg >= base_reg && reg <= top_reg)
return rbnode;
}
node = rbtree_ctx->root.rb_node;
while (node) {
rbnode = rb_entry(node, struct regcache_rbtree_node, node);
regcache_rbtree_get_base_top_reg(map, rbnode, &base_reg,
&top_reg);
if (reg >= base_reg && reg <= top_reg) {
rbtree_ctx->cached_rbnode = rbnode;
return rbnode;
} else if (reg > top_reg) {
node = node->rb_right;
} else if (reg < base_reg) {
node = node->rb_left;
}
}
return NULL;
}
static int regcache_rbtree_insert(struct regmap *map, struct rb_root *root,
struct regcache_rbtree_node *rbnode)
{
struct rb_node **new, *parent;
struct regcache_rbtree_node *rbnode_tmp;
unsigned int base_reg_tmp, top_reg_tmp;
unsigned int base_reg;
parent = NULL;
new = &root->rb_node;
while (*new) {
rbnode_tmp = rb_entry(*new, struct regcache_rbtree_node, node);
/* base and top registers of the current rbnode */
regcache_rbtree_get_base_top_reg(map, rbnode_tmp, &base_reg_tmp,
&top_reg_tmp);
/* base register of the rbnode to be added */
base_reg = rbnode->base_reg;
parent = *new;
/* if this register has already been inserted, just return */
if (base_reg >= base_reg_tmp &&
base_reg <= top_reg_tmp)
return 0;
else if (base_reg > top_reg_tmp)
new = &((*new)->rb_right);
else if (base_reg < base_reg_tmp)
new = &((*new)->rb_left);
}
/* insert the node into the rbtree */
rb_link_node(&rbnode->node, parent, new);
rb_insert_color(&rbnode->node, root);
return 1;
}
#ifdef CONFIG_DEBUG_FS
static int rbtree_show(struct seq_file *s, void *ignored)
{
struct regmap *map = s->private;
struct regcache_rbtree_ctx *rbtree_ctx = map->cache;
struct regcache_rbtree_node *n;
struct rb_node *node;
unsigned int base, top;
size_t mem_size;
int nodes = 0;
int registers = 0;
int this_registers, average;
map->lock(map->lock_arg);
mem_size = sizeof(*rbtree_ctx);
for (node = rb_first(&rbtree_ctx->root); node != NULL;
node = rb_next(node)) {
n = rb_entry(node, struct regcache_rbtree_node, node);
mem_size += sizeof(*n);
mem_size += (n->blklen * map->cache_word_size);
mem_size += BITS_TO_LONGS(n->blklen) * sizeof(long);
regcache_rbtree_get_base_top_reg(map, n, &base, &top);
this_registers = ((top - base) / map->reg_stride) + 1;
seq_printf(s, "%x-%x (%d)\n", base, top, this_registers);
nodes++;
registers += this_registers;
}
if (nodes)
average = registers / nodes;
else
average = 0;
seq_printf(s, "%d nodes, %d registers, average %d registers, used %zu bytes\n",
nodes, registers, average, mem_size);
map->unlock(map->lock_arg);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(rbtree);
static void rbtree_debugfs_init(struct regmap *map)
{
debugfs_create_file("rbtree", 0400, map->debugfs, map, &rbtree_fops);
}
#endif
static int regcache_rbtree_init(struct regmap *map)
{
struct regcache_rbtree_ctx *rbtree_ctx;
int i;
int ret;
map->cache = kmalloc(sizeof *rbtree_ctx, GFP_KERNEL);
if (!map->cache)
return -ENOMEM;
rbtree_ctx = map->cache;
rbtree_ctx->root = RB_ROOT;
rbtree_ctx->cached_rbnode = NULL;
for (i = 0; i < map->num_reg_defaults; i++) {
ret = regcache_rbtree_write(map,
map->reg_defaults[i].reg,
map->reg_defaults[i].def);
if (ret)
goto err;
}
return 0;
err:
regcache_rbtree_exit(map);
return ret;
}
static int regcache_rbtree_exit(struct regmap *map)
{
struct rb_node *next;
struct regcache_rbtree_ctx *rbtree_ctx;
struct regcache_rbtree_node *rbtree_node;
/* if we've already been called then just return */
rbtree_ctx = map->cache;
if (!rbtree_ctx)
return 0;
/* free up the rbtree */
next = rb_first(&rbtree_ctx->root);
while (next) {
rbtree_node = rb_entry(next, struct regcache_rbtree_node, node);
next = rb_next(&rbtree_node->node);
rb_erase(&rbtree_node->node, &rbtree_ctx->root);
kfree(rbtree_node->cache_present);
kfree(rbtree_node->block);
kfree(rbtree_node);
}
/* release the resources */
kfree(map->cache);
map->cache = NULL;
return 0;
}
static int regcache_rbtree_read(struct regmap *map,
unsigned int reg, unsigned int *value)
{
struct regcache_rbtree_node *rbnode;
unsigned int reg_tmp;
rbnode = regcache_rbtree_lookup(map, reg);
if (rbnode) {
reg_tmp = (reg - rbnode->base_reg) / map->reg_stride;
if (!test_bit(reg_tmp, rbnode->cache_present))
return -ENOENT;
*value = regcache_rbtree_get_register(map, rbnode, reg_tmp);
} else {
return -ENOENT;
}
return 0;
}
static int regcache_rbtree_insert_to_block(struct regmap *map,
struct regcache_rbtree_node *rbnode,
regmap: rbtree: Reduce number of nodes, take 2 Support for reducing the number of nodes and memory consumption of the rbtree cache by allowing for small unused holes in the node's register cache block was initially added in commit 0c7ed856 ("regmap: Cut down on the average # of nodes in the rbtree cache"). But the commit had problems and so its effect was reverted again in commit 4e67fb5 ("regmap: rbtree: Fix overlapping rbnodes."). This patch brings the feature back of reducing the average number of nodes, which will speedup node look-up, while at the same time also reducing the memory usage of the rbtree cache. This patch takes a slightly different approach than the original patch though. It modifies the adjacent node look-up to not only consider nodes that are just one to the left or the right of the register but any node that falls in a certain range around the register. The range is calculated based on how much memory it would take to allocate a new node compared to how much memory it takes adding a set of unused registers to an existing node. E.g. if a node takes up 24 bytes and each register in a block uses 1 byte the range will be from the register address - 24 to the register address + 24. If we find a node that falls within this range it is cheaper or as expensive to add the register to the existing node and have a couple of unused registers in the node's cache compared to allocating a new node. Signed-off-by: Lars-Peter Clausen <lars@metafoo.de> Signed-off-by: Mark Brown <broonie@linaro.org>
2013-08-29 15:26:33 +07:00
unsigned int base_reg,
unsigned int top_reg,
unsigned int reg,
unsigned int value)
{
regmap: rbtree: Reduce number of nodes, take 2 Support for reducing the number of nodes and memory consumption of the rbtree cache by allowing for small unused holes in the node's register cache block was initially added in commit 0c7ed856 ("regmap: Cut down on the average # of nodes in the rbtree cache"). But the commit had problems and so its effect was reverted again in commit 4e67fb5 ("regmap: rbtree: Fix overlapping rbnodes."). This patch brings the feature back of reducing the average number of nodes, which will speedup node look-up, while at the same time also reducing the memory usage of the rbtree cache. This patch takes a slightly different approach than the original patch though. It modifies the adjacent node look-up to not only consider nodes that are just one to the left or the right of the register but any node that falls in a certain range around the register. The range is calculated based on how much memory it would take to allocate a new node compared to how much memory it takes adding a set of unused registers to an existing node. E.g. if a node takes up 24 bytes and each register in a block uses 1 byte the range will be from the register address - 24 to the register address + 24. If we find a node that falls within this range it is cheaper or as expensive to add the register to the existing node and have a couple of unused registers in the node's cache compared to allocating a new node. Signed-off-by: Lars-Peter Clausen <lars@metafoo.de> Signed-off-by: Mark Brown <broonie@linaro.org>
2013-08-29 15:26:33 +07:00
unsigned int blklen;
unsigned int pos, offset;
unsigned long *present;
u8 *blk;
regmap: rbtree: Reduce number of nodes, take 2 Support for reducing the number of nodes and memory consumption of the rbtree cache by allowing for small unused holes in the node's register cache block was initially added in commit 0c7ed856 ("regmap: Cut down on the average # of nodes in the rbtree cache"). But the commit had problems and so its effect was reverted again in commit 4e67fb5 ("regmap: rbtree: Fix overlapping rbnodes."). This patch brings the feature back of reducing the average number of nodes, which will speedup node look-up, while at the same time also reducing the memory usage of the rbtree cache. This patch takes a slightly different approach than the original patch though. It modifies the adjacent node look-up to not only consider nodes that are just one to the left or the right of the register but any node that falls in a certain range around the register. The range is calculated based on how much memory it would take to allocate a new node compared to how much memory it takes adding a set of unused registers to an existing node. E.g. if a node takes up 24 bytes and each register in a block uses 1 byte the range will be from the register address - 24 to the register address + 24. If we find a node that falls within this range it is cheaper or as expensive to add the register to the existing node and have a couple of unused registers in the node's cache compared to allocating a new node. Signed-off-by: Lars-Peter Clausen <lars@metafoo.de> Signed-off-by: Mark Brown <broonie@linaro.org>
2013-08-29 15:26:33 +07:00
blklen = (top_reg - base_reg) / map->reg_stride + 1;
pos = (reg - base_reg) / map->reg_stride;
offset = (rbnode->base_reg - base_reg) / map->reg_stride;
blk = krealloc(rbnode->block,
regmap: rbtree: Reduce number of nodes, take 2 Support for reducing the number of nodes and memory consumption of the rbtree cache by allowing for small unused holes in the node's register cache block was initially added in commit 0c7ed856 ("regmap: Cut down on the average # of nodes in the rbtree cache"). But the commit had problems and so its effect was reverted again in commit 4e67fb5 ("regmap: rbtree: Fix overlapping rbnodes."). This patch brings the feature back of reducing the average number of nodes, which will speedup node look-up, while at the same time also reducing the memory usage of the rbtree cache. This patch takes a slightly different approach than the original patch though. It modifies the adjacent node look-up to not only consider nodes that are just one to the left or the right of the register but any node that falls in a certain range around the register. The range is calculated based on how much memory it would take to allocate a new node compared to how much memory it takes adding a set of unused registers to an existing node. E.g. if a node takes up 24 bytes and each register in a block uses 1 byte the range will be from the register address - 24 to the register address + 24. If we find a node that falls within this range it is cheaper or as expensive to add the register to the existing node and have a couple of unused registers in the node's cache compared to allocating a new node. Signed-off-by: Lars-Peter Clausen <lars@metafoo.de> Signed-off-by: Mark Brown <broonie@linaro.org>
2013-08-29 15:26:33 +07:00
blklen * map->cache_word_size,
GFP_KERNEL);
if (!blk)
return -ENOMEM;
if (BITS_TO_LONGS(blklen) > BITS_TO_LONGS(rbnode->blklen)) {
present = krealloc(rbnode->cache_present,
BITS_TO_LONGS(blklen) * sizeof(*present),
GFP_KERNEL);
if (!present) {
kfree(blk);
return -ENOMEM;
}
memset(present + BITS_TO_LONGS(rbnode->blklen), 0,
(BITS_TO_LONGS(blklen) - BITS_TO_LONGS(rbnode->blklen))
* sizeof(*present));
} else {
present = rbnode->cache_present;
}
/* insert the register value in the correct place in the rbnode block */
if (pos == 0) {
regmap: rbtree: Reduce number of nodes, take 2 Support for reducing the number of nodes and memory consumption of the rbtree cache by allowing for small unused holes in the node's register cache block was initially added in commit 0c7ed856 ("regmap: Cut down on the average # of nodes in the rbtree cache"). But the commit had problems and so its effect was reverted again in commit 4e67fb5 ("regmap: rbtree: Fix overlapping rbnodes."). This patch brings the feature back of reducing the average number of nodes, which will speedup node look-up, while at the same time also reducing the memory usage of the rbtree cache. This patch takes a slightly different approach than the original patch though. It modifies the adjacent node look-up to not only consider nodes that are just one to the left or the right of the register but any node that falls in a certain range around the register. The range is calculated based on how much memory it would take to allocate a new node compared to how much memory it takes adding a set of unused registers to an existing node. E.g. if a node takes up 24 bytes and each register in a block uses 1 byte the range will be from the register address - 24 to the register address + 24. If we find a node that falls within this range it is cheaper or as expensive to add the register to the existing node and have a couple of unused registers in the node's cache compared to allocating a new node. Signed-off-by: Lars-Peter Clausen <lars@metafoo.de> Signed-off-by: Mark Brown <broonie@linaro.org>
2013-08-29 15:26:33 +07:00
memmove(blk + offset * map->cache_word_size,
blk, rbnode->blklen * map->cache_word_size);
bitmap_shift_left(present, present, offset, blklen);
}
/* update the rbnode block, its size and the base register */
rbnode->block = blk;
regmap: rbtree: Reduce number of nodes, take 2 Support for reducing the number of nodes and memory consumption of the rbtree cache by allowing for small unused holes in the node's register cache block was initially added in commit 0c7ed856 ("regmap: Cut down on the average # of nodes in the rbtree cache"). But the commit had problems and so its effect was reverted again in commit 4e67fb5 ("regmap: rbtree: Fix overlapping rbnodes."). This patch brings the feature back of reducing the average number of nodes, which will speedup node look-up, while at the same time also reducing the memory usage of the rbtree cache. This patch takes a slightly different approach than the original patch though. It modifies the adjacent node look-up to not only consider nodes that are just one to the left or the right of the register but any node that falls in a certain range around the register. The range is calculated based on how much memory it would take to allocate a new node compared to how much memory it takes adding a set of unused registers to an existing node. E.g. if a node takes up 24 bytes and each register in a block uses 1 byte the range will be from the register address - 24 to the register address + 24. If we find a node that falls within this range it is cheaper or as expensive to add the register to the existing node and have a couple of unused registers in the node's cache compared to allocating a new node. Signed-off-by: Lars-Peter Clausen <lars@metafoo.de> Signed-off-by: Mark Brown <broonie@linaro.org>
2013-08-29 15:26:33 +07:00
rbnode->blklen = blklen;
rbnode->base_reg = base_reg;
rbnode->cache_present = present;
regcache_rbtree_set_register(map, rbnode, pos, value);
return 0;
}
static struct regcache_rbtree_node *
regcache_rbtree_node_alloc(struct regmap *map, unsigned int reg)
{
struct regcache_rbtree_node *rbnode;
const struct regmap_range *range;
int i;
rbnode = kzalloc(sizeof(*rbnode), GFP_KERNEL);
if (!rbnode)
return NULL;
/* If there is a read table then use it to guess at an allocation */
if (map->rd_table) {
for (i = 0; i < map->rd_table->n_yes_ranges; i++) {
if (regmap_reg_in_range(reg,
&map->rd_table->yes_ranges[i]))
break;
}
if (i != map->rd_table->n_yes_ranges) {
range = &map->rd_table->yes_ranges[i];
rbnode->blklen = (range->range_max - range->range_min) /
map->reg_stride + 1;
rbnode->base_reg = range->range_min;
}
}
if (!rbnode->blklen) {
rbnode->blklen = 1;
rbnode->base_reg = reg;
}
rbnode->block = kmalloc_array(rbnode->blklen, map->cache_word_size,
GFP_KERNEL);
if (!rbnode->block)
goto err_free;
rbnode->cache_present = kcalloc(BITS_TO_LONGS(rbnode->blklen),
sizeof(*rbnode->cache_present),
GFP_KERNEL);
if (!rbnode->cache_present)
goto err_free_block;
return rbnode;
err_free_block:
kfree(rbnode->block);
err_free:
kfree(rbnode);
return NULL;
}
static int regcache_rbtree_write(struct regmap *map, unsigned int reg,
unsigned int value)
{
struct regcache_rbtree_ctx *rbtree_ctx;
struct regcache_rbtree_node *rbnode, *rbnode_tmp;
struct rb_node *node;
unsigned int reg_tmp;
int ret;
rbtree_ctx = map->cache;
/* if we can't locate it in the cached rbnode we'll have
* to traverse the rbtree looking for it.
*/
rbnode = regcache_rbtree_lookup(map, reg);
if (rbnode) {
reg_tmp = (reg - rbnode->base_reg) / map->reg_stride;
regcache_rbtree_set_register(map, rbnode, reg_tmp, value);
} else {
regmap: rbtree: Reduce number of nodes, take 2 Support for reducing the number of nodes and memory consumption of the rbtree cache by allowing for small unused holes in the node's register cache block was initially added in commit 0c7ed856 ("regmap: Cut down on the average # of nodes in the rbtree cache"). But the commit had problems and so its effect was reverted again in commit 4e67fb5 ("regmap: rbtree: Fix overlapping rbnodes."). This patch brings the feature back of reducing the average number of nodes, which will speedup node look-up, while at the same time also reducing the memory usage of the rbtree cache. This patch takes a slightly different approach than the original patch though. It modifies the adjacent node look-up to not only consider nodes that are just one to the left or the right of the register but any node that falls in a certain range around the register. The range is calculated based on how much memory it would take to allocate a new node compared to how much memory it takes adding a set of unused registers to an existing node. E.g. if a node takes up 24 bytes and each register in a block uses 1 byte the range will be from the register address - 24 to the register address + 24. If we find a node that falls within this range it is cheaper or as expensive to add the register to the existing node and have a couple of unused registers in the node's cache compared to allocating a new node. Signed-off-by: Lars-Peter Clausen <lars@metafoo.de> Signed-off-by: Mark Brown <broonie@linaro.org>
2013-08-29 15:26:33 +07:00
unsigned int base_reg, top_reg;
unsigned int new_base_reg, new_top_reg;
unsigned int min, max;
unsigned int max_dist;
regmap: rbtree: Avoid overlapping nodes When searching for a suitable node that should be used for inserting a new register, which does not fall within the range of any existing node, we not only looks for nodes which are directly adjacent to the new register, but for nodes within a certain proximity. This is done to avoid creating lots of small nodes with just a few registers spacing in between, which would increase memory usage as well as tree traversal time. This means there might be multiple node candidates which fall within the proximity range of the new register. If we choose the first node we encounter, under certain register insertion patterns it is possible to end up with overlapping ranges. This will break order in the rbtree and can cause the cached register value to become corrupted. E.g. take the simplified example where the proximity range is 2 and the register insertion sequence is 1, 4, 2, 3, 5. * Insert of register 1 creates a new node, this is the root of the rbtree * Insert of register 4 creates a new node, which is inserted to the right of the root. * Insert of register 2 gets inserted to the first node * Insert of register 3 gets inserted to the first node * Insert of register 5 also gets inserted into the first node since this is the first node encountered and it is within the proximity range. Now there are two overlapping nodes. To avoid this always choose the node that is closest to the new register. This will ensure that nodes will not overlap. The tree traversal is still done as a binary search, we just don't stop at the first node found. So the complexity of the algorithm stays within the same order. Ideally if a new register is in the range of two adjacent blocks those blocks should be merged, but that is a much more invasive change and left for later. The issue was initially introduced in commit 472fdec7380c ("regmap: rbtree: Reduce number of nodes, take 2"), but became much more exposed by commit 6399aea629b0 ("regmap: rbtree: When adding a reg do a bsearch for target node") which changed the order in which nodes are looked-up. Fixes: 6399aea629b0 ("regmap: rbtree: When adding a reg do a bsearch for target node") Signed-off-by: Lars-Peter Clausen <lars@metafoo.de> Signed-off-by: Mark Brown <broonie@kernel.org>
2016-08-04 22:22:16 +07:00
unsigned int dist, best_dist = UINT_MAX;
regmap: rbtree: Reduce number of nodes, take 2 Support for reducing the number of nodes and memory consumption of the rbtree cache by allowing for small unused holes in the node's register cache block was initially added in commit 0c7ed856 ("regmap: Cut down on the average # of nodes in the rbtree cache"). But the commit had problems and so its effect was reverted again in commit 4e67fb5 ("regmap: rbtree: Fix overlapping rbnodes."). This patch brings the feature back of reducing the average number of nodes, which will speedup node look-up, while at the same time also reducing the memory usage of the rbtree cache. This patch takes a slightly different approach than the original patch though. It modifies the adjacent node look-up to not only consider nodes that are just one to the left or the right of the register but any node that falls in a certain range around the register. The range is calculated based on how much memory it would take to allocate a new node compared to how much memory it takes adding a set of unused registers to an existing node. E.g. if a node takes up 24 bytes and each register in a block uses 1 byte the range will be from the register address - 24 to the register address + 24. If we find a node that falls within this range it is cheaper or as expensive to add the register to the existing node and have a couple of unused registers in the node's cache compared to allocating a new node. Signed-off-by: Lars-Peter Clausen <lars@metafoo.de> Signed-off-by: Mark Brown <broonie@linaro.org>
2013-08-29 15:26:33 +07:00
max_dist = map->reg_stride * sizeof(*rbnode_tmp) /
map->cache_word_size;
if (reg < max_dist)
min = 0;
else
min = reg - max_dist;
max = reg + max_dist;
/* look for an adjacent register to the one we are about to add */
node = rbtree_ctx->root.rb_node;
while (node) {
rbnode_tmp = rb_entry(node, struct regcache_rbtree_node,
node);
regcache_rbtree_get_base_top_reg(map, rbnode_tmp,
&base_reg, &top_reg);
regmap: rbtree: Reduce number of nodes, take 2 Support for reducing the number of nodes and memory consumption of the rbtree cache by allowing for small unused holes in the node's register cache block was initially added in commit 0c7ed856 ("regmap: Cut down on the average # of nodes in the rbtree cache"). But the commit had problems and so its effect was reverted again in commit 4e67fb5 ("regmap: rbtree: Fix overlapping rbnodes."). This patch brings the feature back of reducing the average number of nodes, which will speedup node look-up, while at the same time also reducing the memory usage of the rbtree cache. This patch takes a slightly different approach than the original patch though. It modifies the adjacent node look-up to not only consider nodes that are just one to the left or the right of the register but any node that falls in a certain range around the register. The range is calculated based on how much memory it would take to allocate a new node compared to how much memory it takes adding a set of unused registers to an existing node. E.g. if a node takes up 24 bytes and each register in a block uses 1 byte the range will be from the register address - 24 to the register address + 24. If we find a node that falls within this range it is cheaper or as expensive to add the register to the existing node and have a couple of unused registers in the node's cache compared to allocating a new node. Signed-off-by: Lars-Peter Clausen <lars@metafoo.de> Signed-off-by: Mark Brown <broonie@linaro.org>
2013-08-29 15:26:33 +07:00
if (base_reg <= max && top_reg >= min) {
regmap: rbtree: Avoid overlapping nodes When searching for a suitable node that should be used for inserting a new register, which does not fall within the range of any existing node, we not only looks for nodes which are directly adjacent to the new register, but for nodes within a certain proximity. This is done to avoid creating lots of small nodes with just a few registers spacing in between, which would increase memory usage as well as tree traversal time. This means there might be multiple node candidates which fall within the proximity range of the new register. If we choose the first node we encounter, under certain register insertion patterns it is possible to end up with overlapping ranges. This will break order in the rbtree and can cause the cached register value to become corrupted. E.g. take the simplified example where the proximity range is 2 and the register insertion sequence is 1, 4, 2, 3, 5. * Insert of register 1 creates a new node, this is the root of the rbtree * Insert of register 4 creates a new node, which is inserted to the right of the root. * Insert of register 2 gets inserted to the first node * Insert of register 3 gets inserted to the first node * Insert of register 5 also gets inserted into the first node since this is the first node encountered and it is within the proximity range. Now there are two overlapping nodes. To avoid this always choose the node that is closest to the new register. This will ensure that nodes will not overlap. The tree traversal is still done as a binary search, we just don't stop at the first node found. So the complexity of the algorithm stays within the same order. Ideally if a new register is in the range of two adjacent blocks those blocks should be merged, but that is a much more invasive change and left for later. The issue was initially introduced in commit 472fdec7380c ("regmap: rbtree: Reduce number of nodes, take 2"), but became much more exposed by commit 6399aea629b0 ("regmap: rbtree: When adding a reg do a bsearch for target node") which changed the order in which nodes are looked-up. Fixes: 6399aea629b0 ("regmap: rbtree: When adding a reg do a bsearch for target node") Signed-off-by: Lars-Peter Clausen <lars@metafoo.de> Signed-off-by: Mark Brown <broonie@kernel.org>
2016-08-04 22:22:16 +07:00
if (reg < base_reg)
dist = base_reg - reg;
else if (reg > top_reg)
dist = reg - top_reg;
else
regmap: rbtree: Avoid overlapping nodes When searching for a suitable node that should be used for inserting a new register, which does not fall within the range of any existing node, we not only looks for nodes which are directly adjacent to the new register, but for nodes within a certain proximity. This is done to avoid creating lots of small nodes with just a few registers spacing in between, which would increase memory usage as well as tree traversal time. This means there might be multiple node candidates which fall within the proximity range of the new register. If we choose the first node we encounter, under certain register insertion patterns it is possible to end up with overlapping ranges. This will break order in the rbtree and can cause the cached register value to become corrupted. E.g. take the simplified example where the proximity range is 2 and the register insertion sequence is 1, 4, 2, 3, 5. * Insert of register 1 creates a new node, this is the root of the rbtree * Insert of register 4 creates a new node, which is inserted to the right of the root. * Insert of register 2 gets inserted to the first node * Insert of register 3 gets inserted to the first node * Insert of register 5 also gets inserted into the first node since this is the first node encountered and it is within the proximity range. Now there are two overlapping nodes. To avoid this always choose the node that is closest to the new register. This will ensure that nodes will not overlap. The tree traversal is still done as a binary search, we just don't stop at the first node found. So the complexity of the algorithm stays within the same order. Ideally if a new register is in the range of two adjacent blocks those blocks should be merged, but that is a much more invasive change and left for later. The issue was initially introduced in commit 472fdec7380c ("regmap: rbtree: Reduce number of nodes, take 2"), but became much more exposed by commit 6399aea629b0 ("regmap: rbtree: When adding a reg do a bsearch for target node") which changed the order in which nodes are looked-up. Fixes: 6399aea629b0 ("regmap: rbtree: When adding a reg do a bsearch for target node") Signed-off-by: Lars-Peter Clausen <lars@metafoo.de> Signed-off-by: Mark Brown <broonie@kernel.org>
2016-08-04 22:22:16 +07:00
dist = 0;
if (dist < best_dist) {
rbnode = rbnode_tmp;
best_dist = dist;
new_base_reg = min(reg, base_reg);
new_top_reg = max(reg, top_reg);
}
}
regmap: rbtree: Avoid overlapping nodes When searching for a suitable node that should be used for inserting a new register, which does not fall within the range of any existing node, we not only looks for nodes which are directly adjacent to the new register, but for nodes within a certain proximity. This is done to avoid creating lots of small nodes with just a few registers spacing in between, which would increase memory usage as well as tree traversal time. This means there might be multiple node candidates which fall within the proximity range of the new register. If we choose the first node we encounter, under certain register insertion patterns it is possible to end up with overlapping ranges. This will break order in the rbtree and can cause the cached register value to become corrupted. E.g. take the simplified example where the proximity range is 2 and the register insertion sequence is 1, 4, 2, 3, 5. * Insert of register 1 creates a new node, this is the root of the rbtree * Insert of register 4 creates a new node, which is inserted to the right of the root. * Insert of register 2 gets inserted to the first node * Insert of register 3 gets inserted to the first node * Insert of register 5 also gets inserted into the first node since this is the first node encountered and it is within the proximity range. Now there are two overlapping nodes. To avoid this always choose the node that is closest to the new register. This will ensure that nodes will not overlap. The tree traversal is still done as a binary search, we just don't stop at the first node found. So the complexity of the algorithm stays within the same order. Ideally if a new register is in the range of two adjacent blocks those blocks should be merged, but that is a much more invasive change and left for later. The issue was initially introduced in commit 472fdec7380c ("regmap: rbtree: Reduce number of nodes, take 2"), but became much more exposed by commit 6399aea629b0 ("regmap: rbtree: When adding a reg do a bsearch for target node") which changed the order in which nodes are looked-up. Fixes: 6399aea629b0 ("regmap: rbtree: When adding a reg do a bsearch for target node") Signed-off-by: Lars-Peter Clausen <lars@metafoo.de> Signed-off-by: Mark Brown <broonie@kernel.org>
2016-08-04 22:22:16 +07:00
/*
* Keep looking, we want to choose the closest block,
* otherwise we might end up creating overlapping
* blocks, which breaks the rbtree.
*/
if (reg < base_reg)
node = node->rb_left;
else if (reg > top_reg)
node = node->rb_right;
else
break;
}
if (rbnode) {
ret = regcache_rbtree_insert_to_block(map, rbnode,
regmap: rbtree: Reduce number of nodes, take 2 Support for reducing the number of nodes and memory consumption of the rbtree cache by allowing for small unused holes in the node's register cache block was initially added in commit 0c7ed856 ("regmap: Cut down on the average # of nodes in the rbtree cache"). But the commit had problems and so its effect was reverted again in commit 4e67fb5 ("regmap: rbtree: Fix overlapping rbnodes."). This patch brings the feature back of reducing the average number of nodes, which will speedup node look-up, while at the same time also reducing the memory usage of the rbtree cache. This patch takes a slightly different approach than the original patch though. It modifies the adjacent node look-up to not only consider nodes that are just one to the left or the right of the register but any node that falls in a certain range around the register. The range is calculated based on how much memory it would take to allocate a new node compared to how much memory it takes adding a set of unused registers to an existing node. E.g. if a node takes up 24 bytes and each register in a block uses 1 byte the range will be from the register address - 24 to the register address + 24. If we find a node that falls within this range it is cheaper or as expensive to add the register to the existing node and have a couple of unused registers in the node's cache compared to allocating a new node. Signed-off-by: Lars-Peter Clausen <lars@metafoo.de> Signed-off-by: Mark Brown <broonie@linaro.org>
2013-08-29 15:26:33 +07:00
new_base_reg,
new_top_reg, reg,
value);
if (ret)
return ret;
regmap: rbtree: Avoid overlapping nodes When searching for a suitable node that should be used for inserting a new register, which does not fall within the range of any existing node, we not only looks for nodes which are directly adjacent to the new register, but for nodes within a certain proximity. This is done to avoid creating lots of small nodes with just a few registers spacing in between, which would increase memory usage as well as tree traversal time. This means there might be multiple node candidates which fall within the proximity range of the new register. If we choose the first node we encounter, under certain register insertion patterns it is possible to end up with overlapping ranges. This will break order in the rbtree and can cause the cached register value to become corrupted. E.g. take the simplified example where the proximity range is 2 and the register insertion sequence is 1, 4, 2, 3, 5. * Insert of register 1 creates a new node, this is the root of the rbtree * Insert of register 4 creates a new node, which is inserted to the right of the root. * Insert of register 2 gets inserted to the first node * Insert of register 3 gets inserted to the first node * Insert of register 5 also gets inserted into the first node since this is the first node encountered and it is within the proximity range. Now there are two overlapping nodes. To avoid this always choose the node that is closest to the new register. This will ensure that nodes will not overlap. The tree traversal is still done as a binary search, we just don't stop at the first node found. So the complexity of the algorithm stays within the same order. Ideally if a new register is in the range of two adjacent blocks those blocks should be merged, but that is a much more invasive change and left for later. The issue was initially introduced in commit 472fdec7380c ("regmap: rbtree: Reduce number of nodes, take 2"), but became much more exposed by commit 6399aea629b0 ("regmap: rbtree: When adding a reg do a bsearch for target node") which changed the order in which nodes are looked-up. Fixes: 6399aea629b0 ("regmap: rbtree: When adding a reg do a bsearch for target node") Signed-off-by: Lars-Peter Clausen <lars@metafoo.de> Signed-off-by: Mark Brown <broonie@kernel.org>
2016-08-04 22:22:16 +07:00
rbtree_ctx->cached_rbnode = rbnode;
return 0;
}
/* We did not manage to find a place to insert it in
* an existing block so create a new rbnode.
*/
rbnode = regcache_rbtree_node_alloc(map, reg);
if (!rbnode)
return -ENOMEM;
regcache_rbtree_set_register(map, rbnode,
reg - rbnode->base_reg, value);
regcache_rbtree_insert(map, &rbtree_ctx->root, rbnode);
rbtree_ctx->cached_rbnode = rbnode;
}
return 0;
}
static int regcache_rbtree_sync(struct regmap *map, unsigned int min,
unsigned int max)
{
struct regcache_rbtree_ctx *rbtree_ctx;
struct rb_node *node;
struct regcache_rbtree_node *rbnode;
unsigned int base_reg, top_reg;
unsigned int start, end;
int ret;
rbtree_ctx = map->cache;
for (node = rb_first(&rbtree_ctx->root); node; node = rb_next(node)) {
rbnode = rb_entry(node, struct regcache_rbtree_node, node);
regcache_rbtree_get_base_top_reg(map, rbnode, &base_reg,
&top_reg);
if (base_reg > max)
break;
if (top_reg < min)
continue;
if (min > base_reg)
start = (min - base_reg) / map->reg_stride;
else
start = 0;
if (max < top_reg)
end = (max - base_reg) / map->reg_stride + 1;
else
end = rbnode->blklen;
ret = regcache_sync_block(map, rbnode->block,
rbnode->cache_present,
rbnode->base_reg, start, end);
if (ret != 0)
return ret;
}
return regmap_async_complete(map);
}
static int regcache_rbtree_drop(struct regmap *map, unsigned int min,
unsigned int max)
{
struct regcache_rbtree_ctx *rbtree_ctx;
struct regcache_rbtree_node *rbnode;
struct rb_node *node;
unsigned int base_reg, top_reg;
unsigned int start, end;
rbtree_ctx = map->cache;
for (node = rb_first(&rbtree_ctx->root); node; node = rb_next(node)) {
rbnode = rb_entry(node, struct regcache_rbtree_node, node);
regcache_rbtree_get_base_top_reg(map, rbnode, &base_reg,
&top_reg);
if (base_reg > max)
break;
if (top_reg < min)
continue;
if (min > base_reg)
start = (min - base_reg) / map->reg_stride;
else
start = 0;
if (max < top_reg)
end = (max - base_reg) / map->reg_stride + 1;
else
end = rbnode->blklen;
bitmap_clear(rbnode->cache_present, start, end - start);
}
return 0;
}
struct regcache_ops regcache_rbtree_ops = {
.type = REGCACHE_RBTREE,
.name = "rbtree",
.init = regcache_rbtree_init,
.exit = regcache_rbtree_exit,
#ifdef CONFIG_DEBUG_FS
.debugfs_init = rbtree_debugfs_init,
#endif
.read = regcache_rbtree_read,
.write = regcache_rbtree_write,
.sync = regcache_rbtree_sync,
.drop = regcache_rbtree_drop,
};