kmod/libkmod/libkmod-util.h
Lucas De Marchi 3ba7f59e84 util: Add ALIGN_POWER2
Add static inline function to align a value to it's next power of 2.
This is commonly done by a SWAR like the one in:

http://aggregate.org/MAGIC/#Next Largest Power of 2

However a microbench shows that the implementation herer is a faster.
It doesn't really impact the possible user of this function, but it's
interesting nonetheless.

Using a x86_64 i7 Ivy Bridge it shows a ~4% advantage by using clz
instead instead of the OR and SHL chain. And this is by using a BSR
since Ivy Bridge doesn't have LZCNT. New Haswell processors have the
LZCNT instruction which can make this even better. ARM also has a CLZ
instruction so it should be better, too.

Code used to test:

	...
	v = val[i];
	t1 = get_cycles(0);
	a = ALIGN_POWER2(v);
	t1 = get_cycles(t1);

	t2 = get_cycles(0);
	v = nlpo2(v);
	t2 = get_cycles(t2);

	printf("%u\t%llu\t%llu\t%d\n", v, t1, t2, v == a);
	...

In which val is an array of 20 random unsigned int, nlop2 is the SWAR
implementation and get_cycles uses RDTSC to measure the performance.

Averages:
	ALIGN_POWER2: 	30 cycles
	nlop2:		31.4 cycles
2013-09-20 01:08:46 -05:00

59 lines
2.1 KiB
C

#pragma once
#include "macro.h"
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
char *getline_wrapped(FILE *fp, unsigned int *linenum) __attribute__((nonnull(1)));
#define streq(a, b) (strcmp((a), (b)) == 0)
#define strstartswith(a, b) (strncmp(a, b, strlen(b)) == 0)
void *memdup(const void *p, size_t n) __attribute__((nonnull(1)));
ssize_t read_str_safe(int fd, char *buf, size_t buflen) _must_check_ __attribute__((nonnull(2)));
ssize_t write_str_safe(int fd, const char *buf, size_t buflen) __attribute__((nonnull(2)));
int read_str_long(int fd, long *value, int base) _must_check_ __attribute__((nonnull(2)));
int read_str_ulong(int fd, unsigned long *value, int base) _must_check_ __attribute__((nonnull(2)));
char *strchr_replace(char *s, int c, char r);
bool path_is_absolute(const char *p) _must_check_ __attribute__((nonnull(1)));
char *path_make_absolute_cwd(const char *p) _must_check_ __attribute__((nonnull(1)));
int mkdir_p(const char *path, int len, mode_t mode);
int mkdir_parents(const char *path, mode_t mode);
int alias_normalize(const char *alias, char buf[PATH_MAX], size_t *len) _must_check_ __attribute__((nonnull(1,2)));
char *modname_normalize(const char *modname, char buf[PATH_MAX], size_t *len) __attribute__((nonnull(1, 2)));
char *path_to_modname(const char *path, char buf[PATH_MAX], size_t *len) __attribute__((nonnull(2)));
extern const struct kmod_ext {
const char *ext;
size_t len;
} kmod_exts[];
#define KMOD_EXT_UNC 0
bool path_ends_with_kmod_ext(const char *path, size_t len) __attribute__((nonnull(1)));
unsigned long long stat_mstamp(const struct stat *st);
unsigned long long ts_usec(const struct timespec *ts);
#define get_unaligned(ptr) \
({ \
struct __attribute__((packed)) { \
typeof(*(ptr)) __v; \
} *__p = (typeof(__p)) (ptr); \
__p->__v; \
})
#define put_unaligned(val, ptr) \
do { \
struct __attribute__((packed)) { \
typeof(*(ptr)) __v; \
} *__p = (typeof(__p)) (ptr); \
__p->__v = (val); \
} while(0)
static _always_inline_ unsigned int ALIGN_POWER2(unsigned int u)
{
return 1 << ((sizeof(u) * 8) - __builtin_clz(u - 1));
}