2005-04-17 05:20:36 +07:00
|
|
|
#ifndef __LINUX_BITMAP_H
|
|
|
|
#define __LINUX_BITMAP_H
|
|
|
|
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/bitops.h>
|
|
|
|
#include <linux/string.h>
|
2007-10-19 13:40:37 +07:00
|
|
|
#include <linux/kernel.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* bitmaps provide bit arrays that consume one or more unsigned
|
|
|
|
* longs. The bitmap interface and available operations are listed
|
|
|
|
* here, in bitmap.h
|
|
|
|
*
|
|
|
|
* Function implementations generic to all architectures are in
|
|
|
|
* lib/bitmap.c. Functions implementations that are architecture
|
|
|
|
* specific are in various include/asm-<arch>/bitops.h headers
|
|
|
|
* and other arch/<arch> specific files.
|
|
|
|
*
|
|
|
|
* See lib/bitmap.c for more details.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The available bitmap operations and their rough meaning in the
|
|
|
|
* case that the bitmap is a single unsigned long are thus:
|
|
|
|
*
|
2006-06-26 18:57:10 +07:00
|
|
|
* Note that nbits should be always a compile time evaluable constant.
|
|
|
|
* Otherwise many inlines will generate horrible code.
|
|
|
|
*
|
2005-04-17 05:20:36 +07:00
|
|
|
* bitmap_zero(dst, nbits) *dst = 0UL
|
|
|
|
* bitmap_fill(dst, nbits) *dst = ~0UL
|
|
|
|
* bitmap_copy(dst, src, nbits) *dst = *src
|
|
|
|
* bitmap_and(dst, src1, src2, nbits) *dst = *src1 & *src2
|
|
|
|
* bitmap_or(dst, src1, src2, nbits) *dst = *src1 | *src2
|
|
|
|
* bitmap_xor(dst, src1, src2, nbits) *dst = *src1 ^ *src2
|
|
|
|
* bitmap_andnot(dst, src1, src2, nbits) *dst = *src1 & ~(*src2)
|
|
|
|
* bitmap_complement(dst, src, nbits) *dst = ~(*src)
|
|
|
|
* bitmap_equal(src1, src2, nbits) Are *src1 and *src2 equal?
|
|
|
|
* bitmap_intersects(src1, src2, nbits) Do *src1 and *src2 overlap?
|
|
|
|
* bitmap_subset(src1, src2, nbits) Is *src1 a subset of *src2?
|
|
|
|
* bitmap_empty(src, nbits) Are all bits zero in *src?
|
|
|
|
* bitmap_full(src, nbits) Are all bits set in *src?
|
|
|
|
* bitmap_weight(src, nbits) Hamming Weight: number set bits
|
2009-12-16 07:48:25 +07:00
|
|
|
* bitmap_set(dst, pos, nbits) Set specified bit area
|
|
|
|
* bitmap_clear(dst, pos, nbits) Clear specified bit area
|
|
|
|
* bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area
|
2014-12-13 07:54:45 +07:00
|
|
|
* bitmap_find_next_zero_area_off(buf, len, pos, n, mask) as above
|
2005-04-17 05:20:36 +07:00
|
|
|
* bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n
|
|
|
|
* bitmap_shift_left(dst, src, n, nbits) *dst = *src << n
|
2005-10-31 06:02:33 +07:00
|
|
|
* bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src)
|
|
|
|
* bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit)
|
mempolicy: add bitmap_onto() and bitmap_fold() operations
The following adds two more bitmap operators, bitmap_onto() and bitmap_fold(),
with the usual cpumask and nodemask wrappers.
The bitmap_onto() operator computes one bitmap relative to another. If the
n-th bit in the origin mask is set, then the m-th bit of the destination mask
will be set, where m is the position of the n-th set bit in the relative mask.
The bitmap_fold() operator folds a bitmap into a second that has bit m set iff
the input bitmap has some bit n set, where m == n mod sz, for the specified sz
value.
There are two substantive changes between this patch and its
predecessor bitmap_relative:
1) Renamed bitmap_relative() to be bitmap_onto().
2) Added bitmap_fold().
The essential motivation for bitmap_onto() is to provide a mechanism for
converting a cpuset-relative CPU or Node mask to an absolute mask. Cpuset
relative masks are written as if the current task were in a cpuset whose CPUs
or Nodes were just the consecutive ones numbered 0..N-1, for some N. The
bitmap_onto() operator is provided in anticipation of adding support for the
first such cpuset relative mask, by the mbind() and set_mempolicy() system
calls, using a planned flag of MPOL_F_RELATIVE_NODES. These bitmap operators
(and their nodemask wrappers, in particular) will be used in code that
converts the user specified cpuset relative memory policy to a specific system
node numbered policy, given the current mems_allowed of the tasks cpuset.
Such cpuset relative mempolicies will address two deficiencies
of the existing interface between cpusets and mempolicies:
1) A task cannot at present reliably establish a cpuset
relative mempolicy because there is an essential race
condition, in that the tasks cpuset may be changed in
between the time the task can query its cpuset placement,
and the time the task can issue the applicable mbind or
set_memplicy system call.
2) A task cannot at present establish what cpuset relative
mempolicy it would like to have, if it is in a smaller
cpuset than it might have mempolicy preferences for,
because the existing interface only allows specifying
mempolicies for nodes currently allowed by the cpuset.
Cpuset relative mempolicies are useful for tasks that don't distinguish
particularly between one CPU or Node and another, but only between how many of
each are allowed, and the proper placement of threads and memory pages on the
various CPUs and Nodes available.
The motivation for the added bitmap_fold() can be seen in the following
example.
Let's say an application has specified some mempolicies that presume 16 memory
nodes, including say a mempolicy that specified MPOL_F_RELATIVE_NODES (cpuset
relative) nodes 12-15. Then lets say that application is crammed into a
cpuset that only has 8 memory nodes, 0-7. If one just uses bitmap_onto(),
this mempolicy, mapped to that cpuset, would ignore the requested relative
nodes above 7, leaving it empty of nodes. That's not good; better to fold the
higher nodes down, so that some nodes are included in the resulting mapped
mempolicy. In this case, the mempolicy nodes 12-15 are taken modulo 8 (the
weight of the mems_allowed of the confining cpuset), resulting in a mempolicy
specifying nodes 4-7.
Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: <kosaki.motohiro@jp.fujitsu.com>
Cc: <ray-lk@madrabbit.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-28 16:12:29 +07:00
|
|
|
* bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap
|
|
|
|
* bitmap_fold(dst, orig, sz, nbits) dst bits = orig bits mod sz
|
2006-10-11 15:21:55 +07:00
|
|
|
* bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf
|
|
|
|
* bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf
|
bitmap, irq: add smp_affinity_list interface to /proc/irq
Manually adjusting the smp_affinity for IRQ's becomes unwieldy when the
cpu count is large.
Setting smp affinity to cpus 256 to 263 would be:
echo 000000ff,00000000,00000000,00000000,00000000,00000000,00000000,00000000 > smp_affinity
instead of:
echo 256-263 > smp_affinity_list
Think about what it looks like for cpus around say, 4088 to 4095.
We already have many alternate "list" interfaces:
/sys/devices/system/cpu/cpuX/indexY/shared_cpu_list
/sys/devices/system/cpu/cpuX/topology/thread_siblings_list
/sys/devices/system/cpu/cpuX/topology/core_siblings_list
/sys/devices/system/node/nodeX/cpulist
/sys/devices/pci***/***/local_cpulist
Add a companion interface, smp_affinity_list to use cpu lists instead of
cpu maps. This conforms to other companion interfaces where both a map
and a list interface exists.
This required adding a bitmap_parselist_user() function in a manner
similar to the bitmap_parse_user() function.
[akpm@linux-foundation.org: make __bitmap_parselist() static]
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-05-25 07:13:12 +07:00
|
|
|
* bitmap_parselist(buf, dst, nbits) Parse bitmap dst from kernel buf
|
|
|
|
* bitmap_parselist_user(buf, dst, nbits) Parse bitmap dst from user buf
|
2006-03-24 18:15:44 +07:00
|
|
|
* bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region
|
|
|
|
* bitmap_release_region(bitmap, pos, order) Free specified bit region
|
|
|
|
* bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region
|
2016-02-19 21:23:59 +07:00
|
|
|
* bitmap_from_u32array(dst, nbits, buf, nwords) *dst = *buf (nwords 32b words)
|
|
|
|
* bitmap_to_u32array(buf, nwords, src, nbits) *buf = *dst (nwords 32b words)
|
2005-04-17 05:20:36 +07:00
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Also the following operations in asm/bitops.h apply to bitmaps.
|
|
|
|
*
|
|
|
|
* set_bit(bit, addr) *addr |= bit
|
|
|
|
* clear_bit(bit, addr) *addr &= ~bit
|
|
|
|
* change_bit(bit, addr) *addr ^= bit
|
|
|
|
* test_bit(bit, addr) Is bit set in *addr?
|
|
|
|
* test_and_set_bit(bit, addr) Set bit and return old value
|
|
|
|
* test_and_clear_bit(bit, addr) Clear bit and return old value
|
|
|
|
* test_and_change_bit(bit, addr) Change bit and return old value
|
|
|
|
* find_first_zero_bit(addr, nbits) Position first zero bit in *addr
|
|
|
|
* find_first_bit(addr, nbits) Position first set bit in *addr
|
|
|
|
* find_next_zero_bit(addr, nbits, bit) Position next zero bit in *addr >= bit
|
|
|
|
* find_next_bit(addr, nbits, bit) Position next set bit in *addr >= bit
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The DECLARE_BITMAP(name,bits) macro, in linux/types.h, can be used
|
|
|
|
* to declare an array named 'name' of just enough unsigned longs to
|
|
|
|
* contain all bit positions from 0 to 'bits' - 1.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* lib/bitmap.c provides these functions:
|
|
|
|
*/
|
|
|
|
|
lib: bitmap: make nbits parameter of bitmap_empty unsigned
Many functions in lib/bitmap.c start with an expression such as lim =
bits/BITS_PER_LONG. Since bits has type (signed) int, and since gcc
cannot know that it is in fact non-negative, it generates worse code
than it could. These patches, mostly consisting of changing various
parameters to unsigned, gives a slight overall code reduction:
add/remove: 1/1 grow/shrink: 8/16 up/down: 251/-414 (-163)
function old new delta
tick_device_uses_broadcast 335 425 +90
__irq_alloc_descs 498 554 +56
__bitmap_andnot 73 115 +42
__bitmap_and 70 101 +31
bitmap_weight - 11 +11
copy_hugetlb_page_range 752 762 +10
follow_hugetlb_page 846 854 +8
hugetlb_init 1415 1417 +2
hugetlb_nrpages_setup 130 131 +1
hugetlb_add_hstate 377 376 -1
bitmap_allocate_region 82 80 -2
select_task_rq_fair 2202 2191 -11
hweight_long 66 55 -11
__reg_op 230 219 -11
dm_stats_message 2849 2833 -16
bitmap_parselist 92 74 -18
__bitmap_weight 115 97 -18
__bitmap_subset 153 129 -24
__bitmap_full 128 104 -24
__bitmap_empty 120 96 -24
bitmap_set 179 149 -30
bitmap_clear 185 155 -30
__bitmap_equal 136 105 -31
__bitmap_intersects 148 108 -40
__bitmap_complement 109 67 -42
tick_device_setup_broadcast_func.isra 81 - -81
[The increases in __bitmap_and{,not} are due to bug fixes 17/18,18/18.
No idea why bitmap_weight suddenly appears.] While 163 bytes treewide is
insignificant, I believe the bitmap functions are often called with
locks held, so saving even a few cycles might be worth it.
While making these changes, I found a few other things that might be
worth including. 16,17,18 are actual bug fixes. The rest shouldn't
change the behaviour of any of the functions, provided no-one passed
negative nbits values. If something should come up, it should be fairly
bisectable.
A few issues I thought about, but didn't know what to do with:
* Many of the functions misbehave if nbits is compile-time 0; the
out-of-line functions generally handle 0 correctly. bitmap_fill() is
particularly bad, whether the 0 is known at compile time or not. It
would probably be nice to add detection of at least compile-time 0 and
handle that appropriately.
* I didn't change __bitmap_shift_{left,right} to use unsigned because I
want to fully understand why the algorithm works before making that
change. However, AFAICT, they behave correctly for all (positive) shift
amounts. This is not the case for the small_const_nbits versions. If
for example nbits = n = BITS_PER_LONG, the shift operators turn into
no-ops (at least on x86), so one get *dst = *src, whereas one would
expect to get *dst=0. That difference in behaviour is somewhat
annoying.
This patch (of 18):
The compiler can generate slightly smaller and simpler code when it
knows that "nbits" is non-negative. Since no-one passes a negative
bit-count, this shouldn't affect the semantics.
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-08-07 06:09:49 +07:00
|
|
|
extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits);
|
2014-08-07 06:09:51 +07:00
|
|
|
extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits);
|
2005-04-17 05:20:36 +07:00
|
|
|
extern int __bitmap_equal(const unsigned long *bitmap1,
|
2014-08-07 06:09:53 +07:00
|
|
|
const unsigned long *bitmap2, unsigned int nbits);
|
2005-04-17 05:20:36 +07:00
|
|
|
extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
|
2014-08-07 06:09:55 +07:00
|
|
|
unsigned int nbits);
|
2015-02-14 05:36:02 +07:00
|
|
|
extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src,
|
|
|
|
unsigned int shift, unsigned int nbits);
|
2015-02-14 05:36:13 +07:00
|
|
|
extern void __bitmap_shift_left(unsigned long *dst, const unsigned long *src,
|
|
|
|
unsigned int shift, unsigned int nbits);
|
2009-08-21 23:26:15 +07:00
|
|
|
extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
|
2014-08-07 06:09:59 +07:00
|
|
|
const unsigned long *bitmap2, unsigned int nbits);
|
2005-04-17 05:20:36 +07:00
|
|
|
extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
|
2014-08-07 06:09:59 +07:00
|
|
|
const unsigned long *bitmap2, unsigned int nbits);
|
2005-04-17 05:20:36 +07:00
|
|
|
extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
|
2014-08-07 06:09:59 +07:00
|
|
|
const unsigned long *bitmap2, unsigned int nbits);
|
2009-08-21 23:26:15 +07:00
|
|
|
extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
|
2014-08-07 06:09:59 +07:00
|
|
|
const unsigned long *bitmap2, unsigned int nbits);
|
2005-04-17 05:20:36 +07:00
|
|
|
extern int __bitmap_intersects(const unsigned long *bitmap1,
|
2014-08-07 06:10:01 +07:00
|
|
|
const unsigned long *bitmap2, unsigned int nbits);
|
2005-04-17 05:20:36 +07:00
|
|
|
extern int __bitmap_subset(const unsigned long *bitmap1,
|
2014-08-07 06:10:03 +07:00
|
|
|
const unsigned long *bitmap2, unsigned int nbits);
|
2014-08-07 06:10:05 +07:00
|
|
|
extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2014-08-07 06:10:07 +07:00
|
|
|
extern void bitmap_set(unsigned long *map, unsigned int start, int len);
|
2014-08-07 06:10:10 +07:00
|
|
|
extern void bitmap_clear(unsigned long *map, unsigned int start, int len);
|
2014-12-13 07:54:45 +07:00
|
|
|
|
|
|
|
extern unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
|
|
|
|
unsigned long size,
|
|
|
|
unsigned long start,
|
|
|
|
unsigned int nr,
|
|
|
|
unsigned long align_mask,
|
|
|
|
unsigned long align_offset);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* bitmap_find_next_zero_area - find a contiguous aligned zero area
|
|
|
|
* @map: The address to base the search on
|
|
|
|
* @size: The bitmap size in bits
|
|
|
|
* @start: The bitnumber to start searching at
|
|
|
|
* @nr: The number of zeroed bits we're looking for
|
|
|
|
* @align_mask: Alignment mask for zero area
|
|
|
|
*
|
|
|
|
* The @align_mask should be one less than a power of 2; the effect is that
|
|
|
|
* the bit offset of all zero areas this function finds is multiples of that
|
|
|
|
* power of 2. A @align_mask of 0 means no alignment is required.
|
|
|
|
*/
|
|
|
|
static inline unsigned long
|
|
|
|
bitmap_find_next_zero_area(unsigned long *map,
|
|
|
|
unsigned long size,
|
|
|
|
unsigned long start,
|
|
|
|
unsigned int nr,
|
|
|
|
unsigned long align_mask)
|
|
|
|
{
|
|
|
|
return bitmap_find_next_zero_area_off(map, size, start, nr,
|
|
|
|
align_mask, 0);
|
|
|
|
}
|
2009-12-16 07:48:25 +07:00
|
|
|
|
2006-10-11 15:21:55 +07:00
|
|
|
extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user,
|
|
|
|
unsigned long *dst, int nbits);
|
|
|
|
extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen,
|
2005-04-17 05:20:36 +07:00
|
|
|
unsigned long *dst, int nbits);
|
|
|
|
extern int bitmap_parselist(const char *buf, unsigned long *maskp,
|
|
|
|
int nmaskbits);
|
bitmap, irq: add smp_affinity_list interface to /proc/irq
Manually adjusting the smp_affinity for IRQ's becomes unwieldy when the
cpu count is large.
Setting smp affinity to cpus 256 to 263 would be:
echo 000000ff,00000000,00000000,00000000,00000000,00000000,00000000,00000000 > smp_affinity
instead of:
echo 256-263 > smp_affinity_list
Think about what it looks like for cpus around say, 4088 to 4095.
We already have many alternate "list" interfaces:
/sys/devices/system/cpu/cpuX/indexY/shared_cpu_list
/sys/devices/system/cpu/cpuX/topology/thread_siblings_list
/sys/devices/system/cpu/cpuX/topology/core_siblings_list
/sys/devices/system/node/nodeX/cpulist
/sys/devices/pci***/***/local_cpulist
Add a companion interface, smp_affinity_list to use cpu lists instead of
cpu maps. This conforms to other companion interfaces where both a map
and a list interface exists.
This required adding a bitmap_parselist_user() function in a manner
similar to the bitmap_parse_user() function.
[akpm@linux-foundation.org: make __bitmap_parselist() static]
Signed-off-by: Mike Travis <travis@sgi.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jack Steiner <steiner@sgi.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-05-25 07:13:12 +07:00
|
|
|
extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen,
|
|
|
|
unsigned long *dst, int nbits);
|
2005-10-31 06:02:33 +07:00
|
|
|
extern void bitmap_remap(unsigned long *dst, const unsigned long *src,
|
2015-02-13 06:02:13 +07:00
|
|
|
const unsigned long *old, const unsigned long *new, unsigned int nbits);
|
2005-10-31 06:02:33 +07:00
|
|
|
extern int bitmap_bitremap(int oldbit,
|
|
|
|
const unsigned long *old, const unsigned long *new, int bits);
|
mempolicy: add bitmap_onto() and bitmap_fold() operations
The following adds two more bitmap operators, bitmap_onto() and bitmap_fold(),
with the usual cpumask and nodemask wrappers.
The bitmap_onto() operator computes one bitmap relative to another. If the
n-th bit in the origin mask is set, then the m-th bit of the destination mask
will be set, where m is the position of the n-th set bit in the relative mask.
The bitmap_fold() operator folds a bitmap into a second that has bit m set iff
the input bitmap has some bit n set, where m == n mod sz, for the specified sz
value.
There are two substantive changes between this patch and its
predecessor bitmap_relative:
1) Renamed bitmap_relative() to be bitmap_onto().
2) Added bitmap_fold().
The essential motivation for bitmap_onto() is to provide a mechanism for
converting a cpuset-relative CPU or Node mask to an absolute mask. Cpuset
relative masks are written as if the current task were in a cpuset whose CPUs
or Nodes were just the consecutive ones numbered 0..N-1, for some N. The
bitmap_onto() operator is provided in anticipation of adding support for the
first such cpuset relative mask, by the mbind() and set_mempolicy() system
calls, using a planned flag of MPOL_F_RELATIVE_NODES. These bitmap operators
(and their nodemask wrappers, in particular) will be used in code that
converts the user specified cpuset relative memory policy to a specific system
node numbered policy, given the current mems_allowed of the tasks cpuset.
Such cpuset relative mempolicies will address two deficiencies
of the existing interface between cpusets and mempolicies:
1) A task cannot at present reliably establish a cpuset
relative mempolicy because there is an essential race
condition, in that the tasks cpuset may be changed in
between the time the task can query its cpuset placement,
and the time the task can issue the applicable mbind or
set_memplicy system call.
2) A task cannot at present establish what cpuset relative
mempolicy it would like to have, if it is in a smaller
cpuset than it might have mempolicy preferences for,
because the existing interface only allows specifying
mempolicies for nodes currently allowed by the cpuset.
Cpuset relative mempolicies are useful for tasks that don't distinguish
particularly between one CPU or Node and another, but only between how many of
each are allowed, and the proper placement of threads and memory pages on the
various CPUs and Nodes available.
The motivation for the added bitmap_fold() can be seen in the following
example.
Let's say an application has specified some mempolicies that presume 16 memory
nodes, including say a mempolicy that specified MPOL_F_RELATIVE_NODES (cpuset
relative) nodes 12-15. Then lets say that application is crammed into a
cpuset that only has 8 memory nodes, 0-7. If one just uses bitmap_onto(),
this mempolicy, mapped to that cpuset, would ignore the requested relative
nodes above 7, leaving it empty of nodes. That's not good; better to fold the
higher nodes down, so that some nodes are included in the resulting mapped
mempolicy. In this case, the mempolicy nodes 12-15 are taken modulo 8 (the
weight of the mems_allowed of the confining cpuset), resulting in a mempolicy
specifying nodes 4-7.
Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: <kosaki.motohiro@jp.fujitsu.com>
Cc: <ray-lk@madrabbit.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-28 16:12:29 +07:00
|
|
|
extern void bitmap_onto(unsigned long *dst, const unsigned long *orig,
|
2015-02-13 06:02:01 +07:00
|
|
|
const unsigned long *relmap, unsigned int bits);
|
mempolicy: add bitmap_onto() and bitmap_fold() operations
The following adds two more bitmap operators, bitmap_onto() and bitmap_fold(),
with the usual cpumask and nodemask wrappers.
The bitmap_onto() operator computes one bitmap relative to another. If the
n-th bit in the origin mask is set, then the m-th bit of the destination mask
will be set, where m is the position of the n-th set bit in the relative mask.
The bitmap_fold() operator folds a bitmap into a second that has bit m set iff
the input bitmap has some bit n set, where m == n mod sz, for the specified sz
value.
There are two substantive changes between this patch and its
predecessor bitmap_relative:
1) Renamed bitmap_relative() to be bitmap_onto().
2) Added bitmap_fold().
The essential motivation for bitmap_onto() is to provide a mechanism for
converting a cpuset-relative CPU or Node mask to an absolute mask. Cpuset
relative masks are written as if the current task were in a cpuset whose CPUs
or Nodes were just the consecutive ones numbered 0..N-1, for some N. The
bitmap_onto() operator is provided in anticipation of adding support for the
first such cpuset relative mask, by the mbind() and set_mempolicy() system
calls, using a planned flag of MPOL_F_RELATIVE_NODES. These bitmap operators
(and their nodemask wrappers, in particular) will be used in code that
converts the user specified cpuset relative memory policy to a specific system
node numbered policy, given the current mems_allowed of the tasks cpuset.
Such cpuset relative mempolicies will address two deficiencies
of the existing interface between cpusets and mempolicies:
1) A task cannot at present reliably establish a cpuset
relative mempolicy because there is an essential race
condition, in that the tasks cpuset may be changed in
between the time the task can query its cpuset placement,
and the time the task can issue the applicable mbind or
set_memplicy system call.
2) A task cannot at present establish what cpuset relative
mempolicy it would like to have, if it is in a smaller
cpuset than it might have mempolicy preferences for,
because the existing interface only allows specifying
mempolicies for nodes currently allowed by the cpuset.
Cpuset relative mempolicies are useful for tasks that don't distinguish
particularly between one CPU or Node and another, but only between how many of
each are allowed, and the proper placement of threads and memory pages on the
various CPUs and Nodes available.
The motivation for the added bitmap_fold() can be seen in the following
example.
Let's say an application has specified some mempolicies that presume 16 memory
nodes, including say a mempolicy that specified MPOL_F_RELATIVE_NODES (cpuset
relative) nodes 12-15. Then lets say that application is crammed into a
cpuset that only has 8 memory nodes, 0-7. If one just uses bitmap_onto(),
this mempolicy, mapped to that cpuset, would ignore the requested relative
nodes above 7, leaving it empty of nodes. That's not good; better to fold the
higher nodes down, so that some nodes are included in the resulting mapped
mempolicy. In this case, the mempolicy nodes 12-15 are taken modulo 8 (the
weight of the mems_allowed of the confining cpuset), resulting in a mempolicy
specifying nodes 4-7.
Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: <kosaki.motohiro@jp.fujitsu.com>
Cc: <ray-lk@madrabbit.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-28 16:12:29 +07:00
|
|
|
extern void bitmap_fold(unsigned long *dst, const unsigned long *orig,
|
2015-02-13 06:02:04 +07:00
|
|
|
unsigned int sz, unsigned int nbits);
|
2014-08-07 06:10:16 +07:00
|
|
|
extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order);
|
|
|
|
extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order);
|
|
|
|
extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
|
2016-02-19 21:23:59 +07:00
|
|
|
extern unsigned int bitmap_from_u32array(unsigned long *bitmap,
|
|
|
|
unsigned int nbits,
|
|
|
|
const u32 *buf,
|
|
|
|
unsigned int nwords);
|
|
|
|
extern unsigned int bitmap_to_u32array(u32 *buf,
|
|
|
|
unsigned int nwords,
|
|
|
|
const unsigned long *bitmap,
|
|
|
|
unsigned int nbits);
|
2015-02-14 05:36:00 +07:00
|
|
|
#ifdef __BIG_ENDIAN
|
2015-02-14 05:35:57 +07:00
|
|
|
extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits);
|
2015-02-14 05:36:00 +07:00
|
|
|
#else
|
|
|
|
#define bitmap_copy_le bitmap_copy
|
|
|
|
#endif
|
2015-02-13 06:02:10 +07:00
|
|
|
extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits);
|
2014-09-30 20:48:22 +07:00
|
|
|
extern int bitmap_print_to_pagebuf(bool list, char *buf,
|
|
|
|
const unsigned long *maskp, int nmaskbits);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
linux/bitmap.h: improve BITMAP_{LAST,FIRST}_WORD_MASK
The macro BITMAP_LAST_WORD_MASK can be implemented without a conditional,
which will generally lead to slightly better generated code (221 bytes
saved for allmodconfig-GCOV_KERNEL, ~2k with GCOV_KERNEL). As a small
bonus, this also ensures that the nbits parameter is expanded exactly
once.
In BITMAP_FIRST_WORD_MASK, if start is signed gcc is technically allowed
to assume it is positive (or divisible by BITS_PER_LONG), and hence just
do the simple mask. It doesn't seem to use this, and even on an
architecture like x86 where the shift only depends on the lower 5 or 6
bits, and these bits are not affected by the signedness of the expression,
gcc still generates code to compute the C99 mandated value of start %
BITS_PER_LONG. So just use a mask explicitly, also for consistency with
BITMAP_LAST_WORD_MASK.
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Tejun Heo <tj@kernel.org>
Reviewed-by: George Spelvin <linux@horizon.com>
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-04-16 06:17:42 +07:00
|
|
|
#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
|
|
|
|
#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2008-12-30 05:35:13 +07:00
|
|
|
#define small_const_nbits(nbits) \
|
|
|
|
(__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG)
|
|
|
|
|
2015-02-13 06:01:53 +07:00
|
|
|
static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2008-12-30 05:35:13 +07:00
|
|
|
if (small_const_nbits(nbits))
|
2005-04-17 05:20:36 +07:00
|
|
|
*dst = 0UL;
|
|
|
|
else {
|
2015-02-13 06:01:53 +07:00
|
|
|
unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
|
2005-04-17 05:20:36 +07:00
|
|
|
memset(dst, 0, len);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-13 06:01:53 +07:00
|
|
|
static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2015-02-13 06:01:53 +07:00
|
|
|
unsigned int nlongs = BITS_TO_LONGS(nbits);
|
2008-12-30 05:35:13 +07:00
|
|
|
if (!small_const_nbits(nbits)) {
|
2015-02-13 06:01:53 +07:00
|
|
|
unsigned int len = (nlongs - 1) * sizeof(unsigned long);
|
2005-04-17 05:20:36 +07:00
|
|
|
memset(dst, 0xff, len);
|
|
|
|
}
|
|
|
|
dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
|
2015-02-13 06:01:53 +07:00
|
|
|
unsigned int nbits)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2008-12-30 05:35:13 +07:00
|
|
|
if (small_const_nbits(nbits))
|
2005-04-17 05:20:36 +07:00
|
|
|
*dst = *src;
|
|
|
|
else {
|
2015-02-13 06:01:53 +07:00
|
|
|
unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
|
2005-04-17 05:20:36 +07:00
|
|
|
memcpy(dst, src, len);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-08-21 23:26:15 +07:00
|
|
|
static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
|
2014-08-07 06:09:59 +07:00
|
|
|
const unsigned long *src2, unsigned int nbits)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2008-12-30 05:35:13 +07:00
|
|
|
if (small_const_nbits(nbits))
|
2014-08-07 06:10:22 +07:00
|
|
|
return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
|
2009-08-21 23:26:15 +07:00
|
|
|
return __bitmap_and(dst, src1, src2, nbits);
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
|
2014-08-07 06:09:59 +07:00
|
|
|
const unsigned long *src2, unsigned int nbits)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2008-12-30 05:35:13 +07:00
|
|
|
if (small_const_nbits(nbits))
|
2005-04-17 05:20:36 +07:00
|
|
|
*dst = *src1 | *src2;
|
|
|
|
else
|
|
|
|
__bitmap_or(dst, src1, src2, nbits);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
|
2014-08-07 06:09:59 +07:00
|
|
|
const unsigned long *src2, unsigned int nbits)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2008-12-30 05:35:13 +07:00
|
|
|
if (small_const_nbits(nbits))
|
2005-04-17 05:20:36 +07:00
|
|
|
*dst = *src1 ^ *src2;
|
|
|
|
else
|
|
|
|
__bitmap_xor(dst, src1, src2, nbits);
|
|
|
|
}
|
|
|
|
|
2009-08-21 23:26:15 +07:00
|
|
|
static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
|
2014-08-07 06:09:59 +07:00
|
|
|
const unsigned long *src2, unsigned int nbits)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2008-12-30 05:35:13 +07:00
|
|
|
if (small_const_nbits(nbits))
|
2014-08-07 06:10:24 +07:00
|
|
|
return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
|
2009-08-21 23:26:15 +07:00
|
|
|
return __bitmap_andnot(dst, src1, src2, nbits);
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
|
2014-08-07 06:09:55 +07:00
|
|
|
unsigned int nbits)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2008-12-30 05:35:13 +07:00
|
|
|
if (small_const_nbits(nbits))
|
2014-08-07 06:09:57 +07:00
|
|
|
*dst = ~(*src);
|
2005-04-17 05:20:36 +07:00
|
|
|
else
|
|
|
|
__bitmap_complement(dst, src, nbits);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int bitmap_equal(const unsigned long *src1,
|
2014-08-07 06:09:55 +07:00
|
|
|
const unsigned long *src2, unsigned int nbits)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2008-12-30 05:35:13 +07:00
|
|
|
if (small_const_nbits(nbits))
|
2016-08-04 03:45:54 +07:00
|
|
|
return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
|
2016-05-25 14:32:20 +07:00
|
|
|
#ifdef CONFIG_S390
|
2016-08-04 03:45:54 +07:00
|
|
|
if (__builtin_constant_p(nbits) && (nbits % BITS_PER_LONG) == 0)
|
2016-05-25 14:32:20 +07:00
|
|
|
return !memcmp(src1, src2, nbits / 8);
|
|
|
|
#endif
|
2016-08-04 03:45:54 +07:00
|
|
|
return __bitmap_equal(src1, src2, nbits);
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline int bitmap_intersects(const unsigned long *src1,
|
2014-08-07 06:10:01 +07:00
|
|
|
const unsigned long *src2, unsigned int nbits)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2008-12-30 05:35:13 +07:00
|
|
|
if (small_const_nbits(nbits))
|
2005-04-17 05:20:36 +07:00
|
|
|
return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
|
|
|
|
else
|
|
|
|
return __bitmap_intersects(src1, src2, nbits);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int bitmap_subset(const unsigned long *src1,
|
2014-08-07 06:10:03 +07:00
|
|
|
const unsigned long *src2, unsigned int nbits)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2008-12-30 05:35:13 +07:00
|
|
|
if (small_const_nbits(nbits))
|
2005-04-17 05:20:36 +07:00
|
|
|
return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
|
|
|
|
else
|
|
|
|
return __bitmap_subset(src1, src2, nbits);
|
|
|
|
}
|
|
|
|
|
lib: bitmap: make nbits parameter of bitmap_empty unsigned
Many functions in lib/bitmap.c start with an expression such as lim =
bits/BITS_PER_LONG. Since bits has type (signed) int, and since gcc
cannot know that it is in fact non-negative, it generates worse code
than it could. These patches, mostly consisting of changing various
parameters to unsigned, gives a slight overall code reduction:
add/remove: 1/1 grow/shrink: 8/16 up/down: 251/-414 (-163)
function old new delta
tick_device_uses_broadcast 335 425 +90
__irq_alloc_descs 498 554 +56
__bitmap_andnot 73 115 +42
__bitmap_and 70 101 +31
bitmap_weight - 11 +11
copy_hugetlb_page_range 752 762 +10
follow_hugetlb_page 846 854 +8
hugetlb_init 1415 1417 +2
hugetlb_nrpages_setup 130 131 +1
hugetlb_add_hstate 377 376 -1
bitmap_allocate_region 82 80 -2
select_task_rq_fair 2202 2191 -11
hweight_long 66 55 -11
__reg_op 230 219 -11
dm_stats_message 2849 2833 -16
bitmap_parselist 92 74 -18
__bitmap_weight 115 97 -18
__bitmap_subset 153 129 -24
__bitmap_full 128 104 -24
__bitmap_empty 120 96 -24
bitmap_set 179 149 -30
bitmap_clear 185 155 -30
__bitmap_equal 136 105 -31
__bitmap_intersects 148 108 -40
__bitmap_complement 109 67 -42
tick_device_setup_broadcast_func.isra 81 - -81
[The increases in __bitmap_and{,not} are due to bug fixes 17/18,18/18.
No idea why bitmap_weight suddenly appears.] While 163 bytes treewide is
insignificant, I believe the bitmap functions are often called with
locks held, so saving even a few cycles might be worth it.
While making these changes, I found a few other things that might be
worth including. 16,17,18 are actual bug fixes. The rest shouldn't
change the behaviour of any of the functions, provided no-one passed
negative nbits values. If something should come up, it should be fairly
bisectable.
A few issues I thought about, but didn't know what to do with:
* Many of the functions misbehave if nbits is compile-time 0; the
out-of-line functions generally handle 0 correctly. bitmap_fill() is
particularly bad, whether the 0 is known at compile time or not. It
would probably be nice to add detection of at least compile-time 0 and
handle that appropriately.
* I didn't change __bitmap_shift_{left,right} to use unsigned because I
want to fully understand why the algorithm works before making that
change. However, AFAICT, they behave correctly for all (positive) shift
amounts. This is not the case for the small_const_nbits versions. If
for example nbits = n = BITS_PER_LONG, the shift operators turn into
no-ops (at least on x86), so one get *dst = *src, whereas one would
expect to get *dst=0. That difference in behaviour is somewhat
annoying.
This patch (of 18):
The compiler can generate slightly smaller and simpler code when it
knows that "nbits" is non-negative. Since no-one passes a negative
bit-count, this shouldn't affect the semantics.
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-08-07 06:09:49 +07:00
|
|
|
static inline int bitmap_empty(const unsigned long *src, unsigned nbits)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2008-12-30 05:35:13 +07:00
|
|
|
if (small_const_nbits(nbits))
|
2005-04-17 05:20:36 +07:00
|
|
|
return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
|
2015-04-17 02:44:00 +07:00
|
|
|
|
|
|
|
return find_first_bit(src, nbits) == nbits;
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
2014-08-07 06:09:51 +07:00
|
|
|
static inline int bitmap_full(const unsigned long *src, unsigned int nbits)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2008-12-30 05:35:13 +07:00
|
|
|
if (small_const_nbits(nbits))
|
2005-04-17 05:20:36 +07:00
|
|
|
return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
|
2015-04-17 02:44:00 +07:00
|
|
|
|
|
|
|
return find_first_zero_bit(src, nbits) == nbits;
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
2015-08-04 21:15:14 +07:00
|
|
|
static __always_inline int bitmap_weight(const unsigned long *src, unsigned int nbits)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2008-12-30 05:35:13 +07:00
|
|
|
if (small_const_nbits(nbits))
|
2006-06-26 18:57:10 +07:00
|
|
|
return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
|
2005-04-17 05:20:36 +07:00
|
|
|
return __bitmap_weight(src, nbits);
|
|
|
|
}
|
|
|
|
|
2015-02-14 05:36:02 +07:00
|
|
|
static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src,
|
|
|
|
unsigned int shift, int nbits)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2008-12-30 05:35:13 +07:00
|
|
|
if (small_const_nbits(nbits))
|
2015-02-14 05:36:02 +07:00
|
|
|
*dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift;
|
2005-04-17 05:20:36 +07:00
|
|
|
else
|
2015-02-14 05:36:02 +07:00
|
|
|
__bitmap_shift_right(dst, src, shift, nbits);
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
2015-02-14 05:36:13 +07:00
|
|
|
static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *src,
|
|
|
|
unsigned int shift, unsigned int nbits)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2008-12-30 05:35:13 +07:00
|
|
|
if (small_const_nbits(nbits))
|
2015-02-14 05:36:13 +07:00
|
|
|
*dst = (*src << shift) & BITMAP_LAST_WORD_MASK(nbits);
|
2005-04-17 05:20:36 +07:00
|
|
|
else
|
2015-02-14 05:36:13 +07:00
|
|
|
__bitmap_shift_left(dst, src, shift, nbits);
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
2006-10-11 15:21:55 +07:00
|
|
|
static inline int bitmap_parse(const char *buf, unsigned int buflen,
|
|
|
|
unsigned long *maskp, int nmaskbits)
|
|
|
|
{
|
|
|
|
return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits);
|
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
|
|
|
|
#endif /* __LINUX_BITMAP_H */
|