mirror of
https://github.com/AuxXxilium/kmod.git
synced 2025-01-14 07:56:14 +07:00
3ba7f59e84
Add static inline function to align a value to it's next power of 2. This is commonly done by a SWAR like the one in: http://aggregate.org/MAGIC/#Next Largest Power of 2 However a microbench shows that the implementation herer is a faster. It doesn't really impact the possible user of this function, but it's interesting nonetheless. Using a x86_64 i7 Ivy Bridge it shows a ~4% advantage by using clz instead instead of the OR and SHL chain. And this is by using a BSR since Ivy Bridge doesn't have LZCNT. New Haswell processors have the LZCNT instruction which can make this even better. ARM also has a CLZ instruction so it should be better, too. Code used to test: ... v = val[i]; t1 = get_cycles(0); a = ALIGN_POWER2(v); t1 = get_cycles(t1); t2 = get_cycles(0); v = nlpo2(v); t2 = get_cycles(t2); printf("%u\t%llu\t%llu\t%d\n", v, t1, t2, v == a); ... In which val is an array of 20 random unsigned int, nlop2 is the SWAR implementation and get_cycles uses RDTSC to measure the performance. Averages: ALIGN_POWER2: 30 cycles nlop2: 31.4 cycles
59 lines
2.1 KiB
C
59 lines
2.1 KiB
C
#pragma once
|
|
#include "macro.h"
|
|
|
|
#include <limits.h>
|
|
#include <stdbool.h>
|
|
#include <stdio.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
|
|
|
|
char *getline_wrapped(FILE *fp, unsigned int *linenum) __attribute__((nonnull(1)));
|
|
#define streq(a, b) (strcmp((a), (b)) == 0)
|
|
#define strstartswith(a, b) (strncmp(a, b, strlen(b)) == 0)
|
|
void *memdup(const void *p, size_t n) __attribute__((nonnull(1)));
|
|
|
|
ssize_t read_str_safe(int fd, char *buf, size_t buflen) _must_check_ __attribute__((nonnull(2)));
|
|
ssize_t write_str_safe(int fd, const char *buf, size_t buflen) __attribute__((nonnull(2)));
|
|
int read_str_long(int fd, long *value, int base) _must_check_ __attribute__((nonnull(2)));
|
|
int read_str_ulong(int fd, unsigned long *value, int base) _must_check_ __attribute__((nonnull(2)));
|
|
char *strchr_replace(char *s, int c, char r);
|
|
bool path_is_absolute(const char *p) _must_check_ __attribute__((nonnull(1)));
|
|
char *path_make_absolute_cwd(const char *p) _must_check_ __attribute__((nonnull(1)));
|
|
int mkdir_p(const char *path, int len, mode_t mode);
|
|
int mkdir_parents(const char *path, mode_t mode);
|
|
int alias_normalize(const char *alias, char buf[PATH_MAX], size_t *len) _must_check_ __attribute__((nonnull(1,2)));
|
|
char *modname_normalize(const char *modname, char buf[PATH_MAX], size_t *len) __attribute__((nonnull(1, 2)));
|
|
char *path_to_modname(const char *path, char buf[PATH_MAX], size_t *len) __attribute__((nonnull(2)));
|
|
|
|
extern const struct kmod_ext {
|
|
const char *ext;
|
|
size_t len;
|
|
} kmod_exts[];
|
|
#define KMOD_EXT_UNC 0
|
|
bool path_ends_with_kmod_ext(const char *path, size_t len) __attribute__((nonnull(1)));
|
|
|
|
unsigned long long stat_mstamp(const struct stat *st);
|
|
unsigned long long ts_usec(const struct timespec *ts);
|
|
|
|
#define get_unaligned(ptr) \
|
|
({ \
|
|
struct __attribute__((packed)) { \
|
|
typeof(*(ptr)) __v; \
|
|
} *__p = (typeof(__p)) (ptr); \
|
|
__p->__v; \
|
|
})
|
|
|
|
#define put_unaligned(val, ptr) \
|
|
do { \
|
|
struct __attribute__((packed)) { \
|
|
typeof(*(ptr)) __v; \
|
|
} *__p = (typeof(__p)) (ptr); \
|
|
__p->__v = (val); \
|
|
} while(0)
|
|
|
|
static _always_inline_ unsigned int ALIGN_POWER2(unsigned int u)
|
|
{
|
|
return 1 << ((sizeof(u) * 8) - __builtin_clz(u - 1));
|
|
}
|