mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-22 21:15:44 +07:00
166a0f780a
There is a slightly faster way (in terms of the number of instructions being used) to calculate the position of a middle element, preserving integer overflow safeness. ./scripts/bloat-o-meter lib/bsearch.o.old lib/bsearch.o.new add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-24 (-24) function old new delta bsearch 122 98 -24 TEST INT array of size 100001, elements [0..100000]. gcc 7.1, Os, x86_64. a) bsearch() of existing key "100001 - 2": BASE ==== $ perf stat ./a.out Performance counter stats for './a.out': 619.445196 task-clock:u (msec) # 0.999 CPUs utilized 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 133 page-faults:u # 0.215 K/sec 1,949,517,279 cycles:u # 3.147 GHz (83.06%) 181,017,938 stalled-cycles-frontend:u # 9.29% frontend cycles idle (83.05%) 82,959,265 stalled-cycles-backend:u # 4.26% backend cycles idle (67.02%) 4,355,706,383 instructions:u # 2.23 insn per cycle # 0.04 stalled cycles per insn (83.54%) 1,051,539,242 branches:u # 1697.550 M/sec (83.54%) 15,263,381 branch-misses:u # 1.45% of all branches (83.43%) 0.620082548 seconds time elapsed PATCHED ======= $ perf stat ./a.out Performance counter stats for './a.out': 475.097316 task-clock:u (msec) # 0.999 CPUs utilized 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 135 page-faults:u # 0.284 K/sec 1,487,467,717 cycles:u # 3.131 GHz (82.95%) 186,537,162 stalled-cycles-frontend:u # 12.54% frontend cycles idle (82.93%) 28,797,869 stalled-cycles-backend:u # 1.94% backend cycles idle (67.10%) 3,807,564,203 instructions:u # 2.56 insn per cycle # 0.05 stalled cycles per insn (83.57%) 1,049,344,291 branches:u # 2208.693 M/sec (83.60%) 5,485 branch-misses:u # 0.00% of all branches (83.58%) 0.475760235 seconds time elapsed b) bsearch() of un-existing key "100001 + 2": BASE ==== $ perf stat ./a.out Performance counter stats for './a.out': 499.244480 task-clock:u (msec) # 0.999 CPUs utilized 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 132 page-faults:u # 0.264 K/sec 1,571,194,855 cycles:u # 3.147 GHz (83.18%) 13,450,980 stalled-cycles-frontend:u # 0.86% frontend cycles idle (83.18%) 21,256,072 stalled-cycles-backend:u # 1.35% backend cycles idle (66.78%) 4,171,197,909 instructions:u # 2.65 insn per cycle # 0.01 stalled cycles per insn (83.68%) 1,009,175,281 branches:u # 2021.405 M/sec (83.79%) 3,122 branch-misses:u # 0.00% of all branches (83.37%) 0.499871144 seconds time elapsed PATCHED ======= $ perf stat ./a.out Performance counter stats for './a.out': 399.023499 task-clock:u (msec) # 0.998 CPUs utilized 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 134 page-faults:u # 0.336 K/sec 1,245,793,991 cycles:u # 3.122 GHz (83.39%) 11,529,273 stalled-cycles-frontend:u # 0.93% frontend cycles idle (83.46%) 12,116,311 stalled-cycles-backend:u # 0.97% backend cycles idle (66.92%) 3,679,710,005 instructions:u # 2.95 insn per cycle # 0.00 stalled cycles per insn (83.47%) 1,009,792,625 branches:u # 2530.660 M/sec (83.46%) 2,590 branch-misses:u # 0.00% of all branches (83.12%) 0.399733539 seconds time elapsed Link: http://lkml.kernel.org/r/20170607150457.5905-1-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
56 lines
1.5 KiB
C
56 lines
1.5 KiB
C
/*
|
|
* A generic implementation of binary search for the Linux kernel
|
|
*
|
|
* Copyright (C) 2008-2009 Ksplice, Inc.
|
|
* Author: Tim Abbott <tabbott@ksplice.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License as
|
|
* published by the Free Software Foundation; version 2.
|
|
*/
|
|
|
|
#include <linux/export.h>
|
|
#include <linux/bsearch.h>
|
|
|
|
/*
|
|
* bsearch - binary search an array of elements
|
|
* @key: pointer to item being searched for
|
|
* @base: pointer to first element to search
|
|
* @num: number of elements
|
|
* @size: size of each element
|
|
* @cmp: pointer to comparison function
|
|
*
|
|
* This function does a binary search on the given array. The
|
|
* contents of the array should already be in ascending sorted order
|
|
* under the provided comparison function.
|
|
*
|
|
* Note that the key need not have the same type as the elements in
|
|
* the array, e.g. key could be a string and the comparison function
|
|
* could compare the string with the struct's name field. However, if
|
|
* the key and elements in the array are of the same type, you can use
|
|
* the same comparison function for both sort() and bsearch().
|
|
*/
|
|
void *bsearch(const void *key, const void *base, size_t num, size_t size,
|
|
int (*cmp)(const void *key, const void *elt))
|
|
{
|
|
const char *pivot;
|
|
int result;
|
|
|
|
while (num > 0) {
|
|
pivot = base + (num >> 1) * size;
|
|
result = cmp(key, pivot);
|
|
|
|
if (result == 0)
|
|
return (void *)pivot;
|
|
|
|
if (result > 0) {
|
|
base = pivot + size;
|
|
num--;
|
|
}
|
|
num >>= 1;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
EXPORT_SYMBOL(bsearch);
|