2005-04-17 05:20:36 +07:00
|
|
|
#ifndef _LINUX_MODULE_H
|
|
|
|
#define _LINUX_MODULE_H
|
|
|
|
/*
|
|
|
|
* Dynamic loading of modules into the kernel.
|
|
|
|
*
|
|
|
|
* Rewritten by Richard Henderson <rth@tamu.edu> Dec 1996
|
|
|
|
* Rewritten again by Rusty Russell, 2002
|
|
|
|
*/
|
|
|
|
#include <linux/list.h>
|
|
|
|
#include <linux/stat.h>
|
|
|
|
#include <linux/compiler.h>
|
|
|
|
#include <linux/cache.h>
|
|
|
|
#include <linux/kmod.h>
|
|
|
|
#include <linux/elf.h>
|
|
|
|
#include <linux/stringify.h>
|
|
|
|
#include <linux/kobject.h>
|
|
|
|
#include <linux/moduleparam.h>
|
tracing: Kernel Tracepoints
Implementation of kernel tracepoints. Inspired from the Linux Kernel
Markers. Allows complete typing verification by declaring both tracing
statement inline functions and probe registration/unregistration static
inline functions within the same macro "DEFINE_TRACE". No format string
is required. See the tracepoint Documentation and Samples patches for
usage examples.
Taken from the documentation patch :
"A tracepoint placed in code provides a hook to call a function (probe)
that you can provide at runtime. A tracepoint can be "on" (a probe is
connected to it) or "off" (no probe is attached). When a tracepoint is
"off" it has no effect, except for adding a tiny time penalty (checking
a condition for a branch) and space penalty (adding a few bytes for the
function call at the end of the instrumented function and adds a data
structure in a separate section). When a tracepoint is "on", the
function you provide is called each time the tracepoint is executed, in
the execution context of the caller. When the function provided ends its
execution, it returns to the caller (continuing from the tracepoint
site).
You can put tracepoints at important locations in the code. They are
lightweight hooks that can pass an arbitrary number of parameters, which
prototypes are described in a tracepoint declaration placed in a header
file."
Addition and removal of tracepoints is synchronized by RCU using the
scheduler (and preempt_disable) as guarantees to find a quiescent state
(this is really RCU "classic"). The update side uses rcu_barrier_sched()
with call_rcu_sched() and the read/execute side uses
"preempt_disable()/preempt_enable()".
We make sure the previous array containing probes, which has been
scheduled for deletion by the rcu callback, is indeed freed before we
proceed to the next update. It therefore limits the rate of modification
of a single tracepoint to one update per RCU period. The objective here
is to permit fast batch add/removal of probes on _different_
tracepoints.
Changelog :
- Use #name ":" #proto as string to identify the tracepoint in the
tracepoint table. This will make sure not type mismatch happens due to
connexion of a probe with the wrong type to a tracepoint declared with
the same name in a different header.
- Add tracepoint_entry_free_old.
- Change __TO_TRACE to get rid of the 'i' iterator.
Masami Hiramatsu <mhiramat@redhat.com> :
Tested on x86-64.
Performance impact of a tracepoint : same as markers, except that it
adds about 70 bytes of instructions in an unlikely branch of each
instrumented function (the for loop, the stack setup and the function
call). It currently adds a memory read, a test and a conditional branch
at the instrumentation site (in the hot path). Immediate values will
eventually change this into a load immediate, test and branch, which
removes the memory read which will make the i-cache impact smaller
(changing the memory read for a load immediate removes 3-4 bytes per
site on x86_32 (depending on mov prefixes), or 7-8 bytes on x86_64, it
also saves the d-cache hit).
About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added.
Quoting Hideo Aoki about Markers :
I evaluated overhead of kernel marker using linux-2.6-sched-fixes git
tree, which includes several markers for LTTng, using an ia64 server.
While the immediate trace mark feature isn't implemented on ia64, there
is no major performance regression. So, I think that we don't have any
issues to propose merging marker point patches into Linus's tree from
the viewpoint of performance impact.
I prepared two kernels to evaluate. The first one was compiled without
CONFIG_MARKERS. The second one was enabled CONFIG_MARKERS.
I downloaded the original hackbench from the following URL:
http://devresources.linux-foundation.org/craiger/hackbench/src/hackbench.c
I ran hackbench 5 times in each condition and calculated the average and
difference between the kernels.
The parameter of hackbench: every 50 from 50 to 800
The number of CPUs of the server: 2, 4, and 8
Below is the results. As you can see, major performance regression
wasn't found in any case. Even if number of processes increases,
differences between marker-enabled kernel and marker- disabled kernel
doesn't increase. Moreover, if number of CPUs increases, the differences
doesn't increase either.
Curiously, marker-enabled kernel is better than marker-disabled kernel
in more than half cases, although I guess it comes from the difference
of memory access pattern.
* 2 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 4.811 | 4.872 | +0.061 | +1.27 |
100 | 9.854 | 10.309 | +0.454 | +4.61 |
150 | 15.602 | 15.040 | -0.562 | -3.6 |
200 | 20.489 | 20.380 | -0.109 | -0.53 |
250 | 25.798 | 25.652 | -0.146 | -0.56 |
300 | 31.260 | 30.797 | -0.463 | -1.48 |
350 | 36.121 | 35.770 | -0.351 | -0.97 |
400 | 42.288 | 42.102 | -0.186 | -0.44 |
450 | 47.778 | 47.253 | -0.526 | -1.1 |
500 | 51.953 | 52.278 | +0.325 | +0.63 |
550 | 58.401 | 57.700 | -0.701 | -1.2 |
600 | 63.334 | 63.222 | -0.112 | -0.18 |
650 | 68.816 | 68.511 | -0.306 | -0.44 |
700 | 74.667 | 74.088 | -0.579 | -0.78 |
750 | 78.612 | 79.582 | +0.970 | +1.23 |
800 | 85.431 | 85.263 | -0.168 | -0.2 |
--------------------------------------------------------------
* 4 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.586 | 2.584 | -0.003 | -0.1 |
100 | 5.254 | 5.283 | +0.030 | +0.56 |
150 | 8.012 | 8.074 | +0.061 | +0.76 |
200 | 11.172 | 11.000 | -0.172 | -1.54 |
250 | 13.917 | 14.036 | +0.119 | +0.86 |
300 | 16.905 | 16.543 | -0.362 | -2.14 |
350 | 19.901 | 20.036 | +0.135 | +0.68 |
400 | 22.908 | 23.094 | +0.186 | +0.81 |
450 | 26.273 | 26.101 | -0.172 | -0.66 |
500 | 29.554 | 29.092 | -0.461 | -1.56 |
550 | 32.377 | 32.274 | -0.103 | -0.32 |
600 | 35.855 | 35.322 | -0.533 | -1.49 |
650 | 39.192 | 38.388 | -0.804 | -2.05 |
700 | 41.744 | 41.719 | -0.025 | -0.06 |
750 | 45.016 | 44.496 | -0.520 | -1.16 |
800 | 48.212 | 47.603 | -0.609 | -1.26 |
--------------------------------------------------------------
* 8 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.094 | 2.072 | -0.022 | -1.07 |
100 | 4.162 | 4.273 | +0.111 | +2.66 |
150 | 6.485 | 6.540 | +0.055 | +0.84 |
200 | 8.556 | 8.478 | -0.078 | -0.91 |
250 | 10.458 | 10.258 | -0.200 | -1.91 |
300 | 12.425 | 12.750 | +0.325 | +2.62 |
350 | 14.807 | 14.839 | +0.032 | +0.22 |
400 | 16.801 | 16.959 | +0.158 | +0.94 |
450 | 19.478 | 19.009 | -0.470 | -2.41 |
500 | 21.296 | 21.504 | +0.208 | +0.98 |
550 | 23.842 | 23.979 | +0.137 | +0.57 |
600 | 26.309 | 26.111 | -0.198 | -0.75 |
650 | 28.705 | 28.446 | -0.259 | -0.9 |
700 | 31.233 | 31.394 | +0.161 | +0.52 |
750 | 34.064 | 33.720 | -0.344 | -1.01 |
800 | 36.320 | 36.114 | -0.206 | -0.57 |
--------------------------------------------------------------
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-18 23:16:16 +07:00
|
|
|
#include <linux/tracepoint.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2010-01-05 13:34:50 +07:00
|
|
|
#include <linux/percpu.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <asm/module.h>
|
|
|
|
|
2009-08-17 15:56:28 +07:00
|
|
|
#include <trace/events/module.h>
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Not Yet Implemented */
|
|
|
|
#define MODULE_SUPPORTED_DEVICE(name)
|
|
|
|
|
2009-11-08 04:03:54 +07:00
|
|
|
/* Some toolchains use a `_' prefix for all user symbols. */
|
|
|
|
#ifdef CONFIG_SYMBOL_PREFIX
|
|
|
|
#define MODULE_SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX
|
|
|
|
#else
|
2005-04-17 05:20:36 +07:00
|
|
|
#define MODULE_SYMBOL_PREFIX ""
|
|
|
|
#endif
|
|
|
|
|
2008-10-22 22:00:22 +07:00
|
|
|
#define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
struct kernel_symbol
|
|
|
|
{
|
|
|
|
unsigned long value;
|
|
|
|
const char *name;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct modversion_info
|
|
|
|
{
|
|
|
|
unsigned long crc;
|
|
|
|
char name[MODULE_NAME_LEN];
|
|
|
|
};
|
|
|
|
|
|
|
|
struct module;
|
|
|
|
|
|
|
|
struct module_attribute {
|
|
|
|
struct attribute attr;
|
|
|
|
ssize_t (*show)(struct module_attribute *, struct module *, char *);
|
|
|
|
ssize_t (*store)(struct module_attribute *, struct module *,
|
|
|
|
const char *, size_t count);
|
[PATCH] modules: add version and srcversion to sysfs
This patch adds version and srcversion files to
/sys/module/${modulename} containing the version and srcversion fields
of the module's modinfo section (if present).
/sys/module/e1000
|-- srcversion
`-- version
This patch differs slightly from the version posted in January, as it
now uses the new kstrdup() call in -mm.
Why put this in sysfs?
a) Tools like DKMS, which deal with changing out individual kernel
modules without replacing the whole kernel, can behave smarter if they
can tell the version of a given module. The autoinstaller feature, for
example, which determines if your system has a "good" version of a
driver (i.e. if the one provided by DKMS has a newer verson than that
provided by the kernel package installed), and to automatically compile
and install a newer version if DKMS has it but your kernel doesn't yet
have that version.
b) Because sysadmins manually, or with tools like DKMS, can switch out
modules on the file system, you can't count on 'modinfo foo.ko', which
looks at /lib/modules/${kernelver}/... actually matching what is loaded
into the kernel already. Hence asking sysfs for this.
c) as the unbind-driver-from-device work takes shape, it will be
possible to rebind a driver that's built-in (no .ko to modinfo for the
version) to a newly loaded module. sysfs will have the
currently-built-in version info, for comparison.
d) tech support scripts can then easily grab the version info for what's
running presently - a question I get often.
There has been renewed interest in this patch on linux-scsi by driver
authors.
As the idea originated from GregKH, I leave his Signed-off-by: intact,
though the implementation is nearly completely new. Compiled and run on
x86 and x86_64.
From: Matthew Dobson <colpatch@us.ibm.com>
build fix
From: Thierry Vignaud <tvignaud@mandriva.com>
build fix
From: Matthew Dobson <colpatch@us.ibm.com>
warning fix
Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 12:05:15 +07:00
|
|
|
void (*setup)(struct module *, const char *);
|
|
|
|
int (*test)(struct module *);
|
|
|
|
void (*free)(struct module *);
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
2010-12-16 05:00:19 +07:00
|
|
|
struct module_version_attribute {
|
|
|
|
struct module_attribute mattr;
|
|
|
|
const char *module_name;
|
|
|
|
const char *version;
|
2011-02-05 04:30:10 +07:00
|
|
|
} __attribute__ ((__aligned__(sizeof(void *))));
|
2010-12-16 05:00:19 +07:00
|
|
|
|
2011-02-08 07:02:27 +07:00
|
|
|
extern ssize_t __modver_version_show(struct module_attribute *,
|
|
|
|
struct module *, char *);
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
struct module_kobject
|
|
|
|
{
|
|
|
|
struct kobject kobj;
|
|
|
|
struct module *mod;
|
2007-01-16 02:22:02 +07:00
|
|
|
struct kobject *drivers_dir;
|
2008-10-22 22:00:22 +07:00
|
|
|
struct module_param_attrs *mp;
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
/* These are either module local, or the kernel's dummy ones. */
|
|
|
|
extern int init_module(void);
|
|
|
|
extern void cleanup_module(void);
|
|
|
|
|
|
|
|
/* Archs provide a method of finding the correct exception table. */
|
|
|
|
struct exception_table_entry;
|
|
|
|
|
|
|
|
const struct exception_table_entry *
|
|
|
|
search_extable(const struct exception_table_entry *first,
|
|
|
|
const struct exception_table_entry *last,
|
|
|
|
unsigned long value);
|
|
|
|
void sort_extable(struct exception_table_entry *start,
|
|
|
|
struct exception_table_entry *finish);
|
|
|
|
void sort_main_extable(void);
|
2009-06-13 10:47:03 +07:00
|
|
|
void trim_init_extable(struct module *m);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
#ifdef MODULE
|
|
|
|
#define MODULE_GENERIC_TABLE(gtype,name) \
|
|
|
|
extern const struct gtype##_id __mod_##gtype##_table \
|
|
|
|
__attribute__ ((unused, alias(__stringify(name))))
|
|
|
|
|
|
|
|
extern struct module __this_module;
|
|
|
|
#define THIS_MODULE (&__this_module)
|
|
|
|
#else /* !MODULE */
|
|
|
|
#define MODULE_GENERIC_TABLE(gtype,name)
|
|
|
|
#define THIS_MODULE ((struct module *)0)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Generic info of form tag = "info" */
|
|
|
|
#define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info)
|
|
|
|
|
|
|
|
/* For userspace: you can also call me... */
|
|
|
|
#define MODULE_ALIAS(_alias) MODULE_INFO(alias, _alias)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The following license idents are currently accepted as indicating free
|
|
|
|
* software modules
|
|
|
|
*
|
|
|
|
* "GPL" [GNU Public License v2 or later]
|
|
|
|
* "GPL v2" [GNU Public License v2]
|
|
|
|
* "GPL and additional rights" [GNU Public License v2 rights and more]
|
|
|
|
* "Dual BSD/GPL" [GNU Public License v2
|
|
|
|
* or BSD license choice]
|
2006-06-23 16:05:13 +07:00
|
|
|
* "Dual MIT/GPL" [GNU Public License v2
|
|
|
|
* or MIT license choice]
|
2005-04-17 05:20:36 +07:00
|
|
|
* "Dual MPL/GPL" [GNU Public License v2
|
|
|
|
* or Mozilla license choice]
|
|
|
|
*
|
|
|
|
* The following other idents are available
|
|
|
|
*
|
|
|
|
* "Proprietary" [Non free products]
|
|
|
|
*
|
|
|
|
* There are dual licensed components, but when running with Linux it is the
|
|
|
|
* GPL that is relevant so this is a non issue. Similarly LGPL linked with GPL
|
|
|
|
* is a GPL combined work.
|
|
|
|
*
|
|
|
|
* This exists for several reasons
|
|
|
|
* 1. So modinfo can show license info for users wanting to vet their setup
|
|
|
|
* is free
|
|
|
|
* 2. So the community can ignore bug reports including proprietary modules
|
|
|
|
* 3. So vendors can do likewise based on their own policies
|
|
|
|
*/
|
|
|
|
#define MODULE_LICENSE(_license) MODULE_INFO(license, _license)
|
|
|
|
|
2009-09-25 13:32:58 +07:00
|
|
|
/*
|
|
|
|
* Author(s), use "Name <email>" or just "Name", for multiple
|
|
|
|
* authors use multiple MODULE_AUTHOR() statements/lines.
|
|
|
|
*/
|
2005-04-17 05:20:36 +07:00
|
|
|
#define MODULE_AUTHOR(_author) MODULE_INFO(author, _author)
|
|
|
|
|
|
|
|
/* What your module does. */
|
|
|
|
#define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description)
|
|
|
|
|
|
|
|
/* One for each parameter, describing how to use it. Some files do
|
|
|
|
multiple of these per line, so can't just use MODULE_INFO. */
|
|
|
|
#define MODULE_PARM_DESC(_parm, desc) \
|
|
|
|
__MODULE_INFO(parm, _parm, #_parm ":" desc)
|
|
|
|
|
|
|
|
#define MODULE_DEVICE_TABLE(type,name) \
|
|
|
|
MODULE_GENERIC_TABLE(type##_device,name)
|
|
|
|
|
|
|
|
/* Version of form [<epoch>:]<version>[-<extra-version>].
|
|
|
|
Or for CVS/RCS ID version, everything but the number is stripped.
|
|
|
|
<epoch>: A (small) unsigned integer which allows you to start versions
|
|
|
|
anew. If not mentioned, it's zero. eg. "2:1.0" is after
|
|
|
|
"1:2.0".
|
|
|
|
<version>: The <version> may contain only alphanumerics and the
|
|
|
|
character `.'. Ordered by numeric sort for numeric parts,
|
|
|
|
ascii sort for ascii parts (as per RPM or DEB algorithm).
|
|
|
|
<extraversion>: Like <version>, but inserted for local
|
|
|
|
customizations, eg "rh3" or "rusty1".
|
|
|
|
|
|
|
|
Using this automatically adds a checksum of the .c files and the
|
|
|
|
local headers in "srcversion".
|
|
|
|
*/
|
2010-12-16 05:00:19 +07:00
|
|
|
|
2011-01-25 03:32:51 +07:00
|
|
|
#if defined(MODULE) || !defined(CONFIG_SYSFS)
|
2005-04-17 05:20:36 +07:00
|
|
|
#define MODULE_VERSION(_version) MODULE_INFO(version, _version)
|
2010-12-16 05:00:19 +07:00
|
|
|
#else
|
|
|
|
#define MODULE_VERSION(_version) \
|
2011-02-08 07:02:25 +07:00
|
|
|
static struct module_version_attribute ___modver_attr = { \
|
2010-12-16 05:00:19 +07:00
|
|
|
.mattr = { \
|
|
|
|
.attr = { \
|
|
|
|
.name = "version", \
|
|
|
|
.mode = S_IRUGO, \
|
|
|
|
}, \
|
|
|
|
.show = __modver_version_show, \
|
|
|
|
}, \
|
|
|
|
.module_name = KBUILD_MODNAME, \
|
|
|
|
.version = _version, \
|
2011-02-08 07:02:25 +07:00
|
|
|
}; \
|
|
|
|
static const struct module_version_attribute \
|
|
|
|
__used __attribute__ ((__section__ ("__modver"))) \
|
|
|
|
* __moduleparam_const __modver_attr = &___modver_attr
|
2010-12-16 05:00:19 +07:00
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2006-08-29 05:08:21 +07:00
|
|
|
/* Optional firmware file (or files) needed by the module
|
|
|
|
* format is simply firmware file name. Multiple firmware
|
|
|
|
* files require multiple MODULE_FIRMWARE() specifiers */
|
|
|
|
#define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware)
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Given an address, look for it in the exception tables */
|
|
|
|
const struct exception_table_entry *search_exception_tables(unsigned long add);
|
|
|
|
|
|
|
|
struct notifier_block;
|
|
|
|
|
|
|
|
#ifdef CONFIG_MODULES
|
|
|
|
|
2010-03-11 06:24:06 +07:00
|
|
|
extern int modules_disabled; /* for sysctl */
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Get/put a kernel symbol (calls must be symmetric) */
|
|
|
|
void *__symbol_get(const char *symbol);
|
|
|
|
void *__symbol_get_gpl(const char *symbol);
|
|
|
|
#define symbol_get(x) ((typeof(&x))(__symbol_get(MODULE_SYMBOL_PREFIX #x)))
|
|
|
|
|
2010-06-06 00:17:35 +07:00
|
|
|
/* modules using other modules: kdb wants to see this. */
|
|
|
|
struct module_use {
|
|
|
|
struct list_head source_list;
|
|
|
|
struct list_head target_list;
|
|
|
|
struct module *source, *target;
|
|
|
|
};
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#ifndef __GENKSYMS__
|
|
|
|
#ifdef CONFIG_MODVERSIONS
|
|
|
|
/* Mark the CRC weak since genksyms apparently decides not to
|
|
|
|
* generate a checksums for some symbols */
|
|
|
|
#define __CRC_SYMBOL(sym, sec) \
|
|
|
|
extern void *__crc_##sym __attribute__((weak)); \
|
|
|
|
static const unsigned long __kcrctab_##sym \
|
2008-01-25 04:16:20 +07:00
|
|
|
__used \
|
2011-04-14 19:59:39 +07:00
|
|
|
__attribute__((section("___kcrctab" sec "+" #sym), unused)) \
|
2005-04-17 05:20:36 +07:00
|
|
|
= (unsigned long) &__crc_##sym;
|
|
|
|
#else
|
|
|
|
#define __CRC_SYMBOL(sym, sec)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* For every exported symbol, place a struct in the __ksymtab section */
|
|
|
|
#define __EXPORT_SYMBOL(sym, sec) \
|
2006-03-24 13:07:34 +07:00
|
|
|
extern typeof(sym) sym; \
|
2005-04-17 05:20:36 +07:00
|
|
|
__CRC_SYMBOL(sym, sec) \
|
|
|
|
static const char __kstrtab_##sym[] \
|
2008-03-13 16:02:17 +07:00
|
|
|
__attribute__((section("__ksymtab_strings"), aligned(1))) \
|
2005-04-17 05:20:36 +07:00
|
|
|
= MODULE_SYMBOL_PREFIX #sym; \
|
|
|
|
static const struct kernel_symbol __ksymtab_##sym \
|
2008-01-25 04:16:20 +07:00
|
|
|
__used \
|
2011-04-14 19:59:39 +07:00
|
|
|
__attribute__((section("___ksymtab" sec "+" #sym), unused)) \
|
2005-04-17 05:20:36 +07:00
|
|
|
= { (unsigned long)&sym, __kstrtab_##sym }
|
|
|
|
|
|
|
|
#define EXPORT_SYMBOL(sym) \
|
|
|
|
__EXPORT_SYMBOL(sym, "")
|
|
|
|
|
|
|
|
#define EXPORT_SYMBOL_GPL(sym) \
|
|
|
|
__EXPORT_SYMBOL(sym, "_gpl")
|
|
|
|
|
2006-03-21 04:17:13 +07:00
|
|
|
#define EXPORT_SYMBOL_GPL_FUTURE(sym) \
|
|
|
|
__EXPORT_SYMBOL(sym, "_gpl_future")
|
|
|
|
|
2006-06-28 18:26:45 +07:00
|
|
|
|
|
|
|
#ifdef CONFIG_UNUSED_SYMBOLS
|
|
|
|
#define EXPORT_UNUSED_SYMBOL(sym) __EXPORT_SYMBOL(sym, "_unused")
|
|
|
|
#define EXPORT_UNUSED_SYMBOL_GPL(sym) __EXPORT_SYMBOL(sym, "_unused_gpl")
|
|
|
|
#else
|
|
|
|
#define EXPORT_UNUSED_SYMBOL(sym)
|
|
|
|
#define EXPORT_UNUSED_SYMBOL_GPL(sym)
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#endif
|
|
|
|
|
|
|
|
enum module_state
|
|
|
|
{
|
|
|
|
MODULE_STATE_LIVE,
|
|
|
|
MODULE_STATE_COMING,
|
|
|
|
MODULE_STATE_GOING,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct module
|
|
|
|
{
|
|
|
|
enum module_state state;
|
|
|
|
|
|
|
|
/* Member of list of modules */
|
|
|
|
struct list_head list;
|
|
|
|
|
|
|
|
/* Unique handle for this module */
|
|
|
|
char name[MODULE_NAME_LEN];
|
|
|
|
|
|
|
|
/* Sysfs stuff. */
|
|
|
|
struct module_kobject mkobj;
|
2006-02-17 04:50:23 +07:00
|
|
|
struct module_attribute *modinfo_attrs;
|
[PATCH] modules: add version and srcversion to sysfs
This patch adds version and srcversion files to
/sys/module/${modulename} containing the version and srcversion fields
of the module's modinfo section (if present).
/sys/module/e1000
|-- srcversion
`-- version
This patch differs slightly from the version posted in January, as it
now uses the new kstrdup() call in -mm.
Why put this in sysfs?
a) Tools like DKMS, which deal with changing out individual kernel
modules without replacing the whole kernel, can behave smarter if they
can tell the version of a given module. The autoinstaller feature, for
example, which determines if your system has a "good" version of a
driver (i.e. if the one provided by DKMS has a newer verson than that
provided by the kernel package installed), and to automatically compile
and install a newer version if DKMS has it but your kernel doesn't yet
have that version.
b) Because sysadmins manually, or with tools like DKMS, can switch out
modules on the file system, you can't count on 'modinfo foo.ko', which
looks at /lib/modules/${kernelver}/... actually matching what is loaded
into the kernel already. Hence asking sysfs for this.
c) as the unbind-driver-from-device work takes shape, it will be
possible to rebind a driver that's built-in (no .ko to modinfo for the
version) to a newly loaded module. sysfs will have the
currently-built-in version info, for comparison.
d) tech support scripts can then easily grab the version info for what's
running presently - a question I get often.
There has been renewed interest in this patch on linux-scsi by driver
authors.
As the idea originated from GregKH, I leave his Signed-off-by: intact,
though the implementation is nearly completely new. Compiled and run on
x86 and x86_64.
From: Matthew Dobson <colpatch@us.ibm.com>
build fix
From: Thierry Vignaud <tvignaud@mandriva.com>
build fix
From: Matthew Dobson <colpatch@us.ibm.com>
warning fix
Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-24 12:05:15 +07:00
|
|
|
const char *version;
|
|
|
|
const char *srcversion;
|
2007-01-18 19:26:15 +07:00
|
|
|
struct kobject *holders_dir;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Exported symbols */
|
|
|
|
const struct kernel_symbol *syms;
|
|
|
|
const unsigned long *crcs;
|
2008-07-23 07:24:26 +07:00
|
|
|
unsigned int num_syms;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-04-01 02:05:29 +07:00
|
|
|
/* Kernel parameters. */
|
|
|
|
struct kernel_param *kp;
|
|
|
|
unsigned int num_kp;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* GPL-only exported symbols. */
|
|
|
|
unsigned int num_gpl_syms;
|
2008-07-23 07:24:26 +07:00
|
|
|
const struct kernel_symbol *gpl_syms;
|
2005-04-17 05:20:36 +07:00
|
|
|
const unsigned long *gpl_crcs;
|
|
|
|
|
2008-07-23 07:24:26 +07:00
|
|
|
#ifdef CONFIG_UNUSED_SYMBOLS
|
2006-06-28 18:26:45 +07:00
|
|
|
/* unused exported symbols. */
|
|
|
|
const struct kernel_symbol *unused_syms;
|
|
|
|
const unsigned long *unused_crcs;
|
2008-07-23 07:24:26 +07:00
|
|
|
unsigned int num_unused_syms;
|
|
|
|
|
2006-06-28 18:26:45 +07:00
|
|
|
/* GPL-only, unused exported symbols. */
|
|
|
|
unsigned int num_unused_gpl_syms;
|
2008-07-23 07:24:26 +07:00
|
|
|
const struct kernel_symbol *unused_gpl_syms;
|
2006-06-28 18:26:45 +07:00
|
|
|
const unsigned long *unused_gpl_crcs;
|
2008-07-23 07:24:26 +07:00
|
|
|
#endif
|
2006-06-28 18:26:45 +07:00
|
|
|
|
2006-03-21 04:17:13 +07:00
|
|
|
/* symbols that will be GPL-only in the near future. */
|
|
|
|
const struct kernel_symbol *gpl_future_syms;
|
|
|
|
const unsigned long *gpl_future_crcs;
|
2008-07-23 07:24:26 +07:00
|
|
|
unsigned int num_gpl_future_syms;
|
2006-03-21 04:17:13 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Exception table */
|
|
|
|
unsigned int num_exentries;
|
2008-10-22 22:00:13 +07:00
|
|
|
struct exception_table_entry *extable;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Startup function. */
|
|
|
|
int (*init)(void);
|
|
|
|
|
|
|
|
/* If this is non-NULL, vfree after init() returns */
|
|
|
|
void *module_init;
|
|
|
|
|
|
|
|
/* Here is the actual code + data, vfree'd on unload. */
|
|
|
|
void *module_core;
|
|
|
|
|
|
|
|
/* Here are the sizes of the init and core sections */
|
2008-07-23 07:24:27 +07:00
|
|
|
unsigned int init_size, core_size;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* The size of the executable code in each section. */
|
2008-07-23 07:24:27 +07:00
|
|
|
unsigned int init_text_size, core_text_size;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2010-11-17 04:35:16 +07:00
|
|
|
/* Size of RO sections of the module (text+rodata) */
|
|
|
|
unsigned int init_ro_size, core_ro_size;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Arch-specific module values */
|
|
|
|
struct mod_arch_specific arch;
|
|
|
|
|
2006-10-02 16:17:02 +07:00
|
|
|
unsigned int taints; /* same bits as kernel:tainted */
|
|
|
|
|
[PATCH] Generic BUG implementation
This patch adds common handling for kernel BUGs, for use by architectures as
they wish. The code is derived from arch/powerpc.
The advantages of having common BUG handling are:
- consistent BUG reporting across architectures
- shared implementation of out-of-line file/line data
- implement CONFIG_DEBUG_BUGVERBOSE consistently
This means that in inline impact of BUG is just the illegal instruction
itself, which is an improvement for i386 and x86-64.
A BUG is represented in the instruction stream as an illegal instruction,
which has file/line information associated with it. This extra information is
stored in the __bug_table section in the ELF file.
When the kernel gets an illegal instruction, it first confirms it might
possibly be from a BUG (ie, in kernel mode, the right illegal instruction).
It then calls report_bug(). This searches __bug_table for a matching
instruction pointer, and if found, prints the corresponding file/line
information. If report_bug() determines that it wasn't a BUG which caused the
trap, it returns BUG_TRAP_TYPE_NONE.
Some architectures (powerpc) implement WARN using the same mechanism; if the
illegal instruction was the result of a WARN, then report_bug(Q) returns
CONFIG_DEBUG_BUGVERBOSE; otherwise it returns BUG_TRAP_TYPE_BUG.
lib/bug.c keeps a list of loaded modules which can be searched for __bug_table
entries. The architecture must call
module_bug_finalize()/module_bug_cleanup() from its corresponding
module_finalize/cleanup functions.
Unsetting CONFIG_DEBUG_BUGVERBOSE will reduce the kernel size by some amount.
At the very least, filename and line information will not be recorded for each
but, but architectures may decide to store no extra information per BUG at
all.
Unfortunately, gcc doesn't have a general way to mark an asm() as noreturn, so
architectures will generally have to include an infinite loop (or similar) in
the BUG code, so that gcc knows execution won't continue beyond that point.
gcc does have a __builtin_trap() operator which may be useful to achieve the
same effect, unfortunately it cannot be used to actually implement the BUG
itself, because there's no way to get the instruction's address for use in
generating the __bug_table entry.
[randy.dunlap@oracle.com: Handle BUG=n, GENERIC_BUG=n to prevent build errors]
[bunk@stusta.de: include/linux/bug.h must always #include <linux/module.h]
Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Hugh Dickens <hugh@veritas.com>
Cc: Michael Ellerman <michael@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-08 17:36:19 +07:00
|
|
|
#ifdef CONFIG_GENERIC_BUG
|
|
|
|
/* Support for BUG */
|
2008-07-23 07:24:26 +07:00
|
|
|
unsigned num_bugs;
|
[PATCH] Generic BUG implementation
This patch adds common handling for kernel BUGs, for use by architectures as
they wish. The code is derived from arch/powerpc.
The advantages of having common BUG handling are:
- consistent BUG reporting across architectures
- shared implementation of out-of-line file/line data
- implement CONFIG_DEBUG_BUGVERBOSE consistently
This means that in inline impact of BUG is just the illegal instruction
itself, which is an improvement for i386 and x86-64.
A BUG is represented in the instruction stream as an illegal instruction,
which has file/line information associated with it. This extra information is
stored in the __bug_table section in the ELF file.
When the kernel gets an illegal instruction, it first confirms it might
possibly be from a BUG (ie, in kernel mode, the right illegal instruction).
It then calls report_bug(). This searches __bug_table for a matching
instruction pointer, and if found, prints the corresponding file/line
information. If report_bug() determines that it wasn't a BUG which caused the
trap, it returns BUG_TRAP_TYPE_NONE.
Some architectures (powerpc) implement WARN using the same mechanism; if the
illegal instruction was the result of a WARN, then report_bug(Q) returns
CONFIG_DEBUG_BUGVERBOSE; otherwise it returns BUG_TRAP_TYPE_BUG.
lib/bug.c keeps a list of loaded modules which can be searched for __bug_table
entries. The architecture must call
module_bug_finalize()/module_bug_cleanup() from its corresponding
module_finalize/cleanup functions.
Unsetting CONFIG_DEBUG_BUGVERBOSE will reduce the kernel size by some amount.
At the very least, filename and line information will not be recorded for each
but, but architectures may decide to store no extra information per BUG at
all.
Unfortunately, gcc doesn't have a general way to mark an asm() as noreturn, so
architectures will generally have to include an infinite loop (or similar) in
the BUG code, so that gcc knows execution won't continue beyond that point.
gcc does have a __builtin_trap() operator which may be useful to achieve the
same effect, unfortunately it cannot be used to actually implement the BUG
itself, because there's no way to get the instruction's address for use in
generating the __bug_table entry.
[randy.dunlap@oracle.com: Handle BUG=n, GENERIC_BUG=n to prevent build errors]
[bunk@stusta.de: include/linux/bug.h must always #include <linux/module.h]
Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Hugh Dickens <hugh@veritas.com>
Cc: Michael Ellerman <michael@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-08 17:36:19 +07:00
|
|
|
struct list_head bug_list;
|
|
|
|
struct bug_entry *bug_table;
|
2005-04-17 05:20:36 +07:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef CONFIG_KALLSYMS
|
2009-07-06 20:50:42 +07:00
|
|
|
/*
|
|
|
|
* We keep the symbol and string tables for kallsyms.
|
|
|
|
* The core_* fields below are temporary, loader-only (they
|
|
|
|
* could really be discarded after module init).
|
|
|
|
*/
|
|
|
|
Elf_Sym *symtab, *core_symtab;
|
|
|
|
unsigned int num_symtab, core_num_syms;
|
2009-07-06 20:51:44 +07:00
|
|
|
char *strtab, *core_strtab;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Section attributes */
|
|
|
|
struct module_sect_attrs *sect_attrs;
|
2007-10-17 13:26:40 +07:00
|
|
|
|
|
|
|
/* Notes attributes */
|
|
|
|
struct module_notes_attrs *notes_attrs;
|
2005-04-17 05:20:36 +07:00
|
|
|
#endif
|
|
|
|
|
2011-05-20 05:55:25 +07:00
|
|
|
/* The command line arguments (may be mangled). People like
|
|
|
|
keeping pointers to this stuff */
|
|
|
|
char *args;
|
|
|
|
|
2010-03-10 16:56:10 +07:00
|
|
|
#ifdef CONFIG_SMP
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Per-cpu data. */
|
2010-03-10 16:56:10 +07:00
|
|
|
void __percpu *percpu;
|
|
|
|
unsigned int percpu_size;
|
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
|
tracing: Kernel Tracepoints
Implementation of kernel tracepoints. Inspired from the Linux Kernel
Markers. Allows complete typing verification by declaring both tracing
statement inline functions and probe registration/unregistration static
inline functions within the same macro "DEFINE_TRACE". No format string
is required. See the tracepoint Documentation and Samples patches for
usage examples.
Taken from the documentation patch :
"A tracepoint placed in code provides a hook to call a function (probe)
that you can provide at runtime. A tracepoint can be "on" (a probe is
connected to it) or "off" (no probe is attached). When a tracepoint is
"off" it has no effect, except for adding a tiny time penalty (checking
a condition for a branch) and space penalty (adding a few bytes for the
function call at the end of the instrumented function and adds a data
structure in a separate section). When a tracepoint is "on", the
function you provide is called each time the tracepoint is executed, in
the execution context of the caller. When the function provided ends its
execution, it returns to the caller (continuing from the tracepoint
site).
You can put tracepoints at important locations in the code. They are
lightweight hooks that can pass an arbitrary number of parameters, which
prototypes are described in a tracepoint declaration placed in a header
file."
Addition and removal of tracepoints is synchronized by RCU using the
scheduler (and preempt_disable) as guarantees to find a quiescent state
(this is really RCU "classic"). The update side uses rcu_barrier_sched()
with call_rcu_sched() and the read/execute side uses
"preempt_disable()/preempt_enable()".
We make sure the previous array containing probes, which has been
scheduled for deletion by the rcu callback, is indeed freed before we
proceed to the next update. It therefore limits the rate of modification
of a single tracepoint to one update per RCU period. The objective here
is to permit fast batch add/removal of probes on _different_
tracepoints.
Changelog :
- Use #name ":" #proto as string to identify the tracepoint in the
tracepoint table. This will make sure not type mismatch happens due to
connexion of a probe with the wrong type to a tracepoint declared with
the same name in a different header.
- Add tracepoint_entry_free_old.
- Change __TO_TRACE to get rid of the 'i' iterator.
Masami Hiramatsu <mhiramat@redhat.com> :
Tested on x86-64.
Performance impact of a tracepoint : same as markers, except that it
adds about 70 bytes of instructions in an unlikely branch of each
instrumented function (the for loop, the stack setup and the function
call). It currently adds a memory read, a test and a conditional branch
at the instrumentation site (in the hot path). Immediate values will
eventually change this into a load immediate, test and branch, which
removes the memory read which will make the i-cache impact smaller
(changing the memory read for a load immediate removes 3-4 bytes per
site on x86_32 (depending on mov prefixes), or 7-8 bytes on x86_64, it
also saves the d-cache hit).
About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added.
Quoting Hideo Aoki about Markers :
I evaluated overhead of kernel marker using linux-2.6-sched-fixes git
tree, which includes several markers for LTTng, using an ia64 server.
While the immediate trace mark feature isn't implemented on ia64, there
is no major performance regression. So, I think that we don't have any
issues to propose merging marker point patches into Linus's tree from
the viewpoint of performance impact.
I prepared two kernels to evaluate. The first one was compiled without
CONFIG_MARKERS. The second one was enabled CONFIG_MARKERS.
I downloaded the original hackbench from the following URL:
http://devresources.linux-foundation.org/craiger/hackbench/src/hackbench.c
I ran hackbench 5 times in each condition and calculated the average and
difference between the kernels.
The parameter of hackbench: every 50 from 50 to 800
The number of CPUs of the server: 2, 4, and 8
Below is the results. As you can see, major performance regression
wasn't found in any case. Even if number of processes increases,
differences between marker-enabled kernel and marker- disabled kernel
doesn't increase. Moreover, if number of CPUs increases, the differences
doesn't increase either.
Curiously, marker-enabled kernel is better than marker-disabled kernel
in more than half cases, although I guess it comes from the difference
of memory access pattern.
* 2 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 4.811 | 4.872 | +0.061 | +1.27 |
100 | 9.854 | 10.309 | +0.454 | +4.61 |
150 | 15.602 | 15.040 | -0.562 | -3.6 |
200 | 20.489 | 20.380 | -0.109 | -0.53 |
250 | 25.798 | 25.652 | -0.146 | -0.56 |
300 | 31.260 | 30.797 | -0.463 | -1.48 |
350 | 36.121 | 35.770 | -0.351 | -0.97 |
400 | 42.288 | 42.102 | -0.186 | -0.44 |
450 | 47.778 | 47.253 | -0.526 | -1.1 |
500 | 51.953 | 52.278 | +0.325 | +0.63 |
550 | 58.401 | 57.700 | -0.701 | -1.2 |
600 | 63.334 | 63.222 | -0.112 | -0.18 |
650 | 68.816 | 68.511 | -0.306 | -0.44 |
700 | 74.667 | 74.088 | -0.579 | -0.78 |
750 | 78.612 | 79.582 | +0.970 | +1.23 |
800 | 85.431 | 85.263 | -0.168 | -0.2 |
--------------------------------------------------------------
* 4 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.586 | 2.584 | -0.003 | -0.1 |
100 | 5.254 | 5.283 | +0.030 | +0.56 |
150 | 8.012 | 8.074 | +0.061 | +0.76 |
200 | 11.172 | 11.000 | -0.172 | -1.54 |
250 | 13.917 | 14.036 | +0.119 | +0.86 |
300 | 16.905 | 16.543 | -0.362 | -2.14 |
350 | 19.901 | 20.036 | +0.135 | +0.68 |
400 | 22.908 | 23.094 | +0.186 | +0.81 |
450 | 26.273 | 26.101 | -0.172 | -0.66 |
500 | 29.554 | 29.092 | -0.461 | -1.56 |
550 | 32.377 | 32.274 | -0.103 | -0.32 |
600 | 35.855 | 35.322 | -0.533 | -1.49 |
650 | 39.192 | 38.388 | -0.804 | -2.05 |
700 | 41.744 | 41.719 | -0.025 | -0.06 |
750 | 45.016 | 44.496 | -0.520 | -1.16 |
800 | 48.212 | 47.603 | -0.609 | -1.26 |
--------------------------------------------------------------
* 8 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.094 | 2.072 | -0.022 | -1.07 |
100 | 4.162 | 4.273 | +0.111 | +2.66 |
150 | 6.485 | 6.540 | +0.055 | +0.84 |
200 | 8.556 | 8.478 | -0.078 | -0.91 |
250 | 10.458 | 10.258 | -0.200 | -1.91 |
300 | 12.425 | 12.750 | +0.325 | +2.62 |
350 | 14.807 | 14.839 | +0.032 | +0.22 |
400 | 16.801 | 16.959 | +0.158 | +0.94 |
450 | 19.478 | 19.009 | -0.470 | -2.41 |
500 | 21.296 | 21.504 | +0.208 | +0.98 |
550 | 23.842 | 23.979 | +0.137 | +0.57 |
600 | 26.309 | 26.111 | -0.198 | -0.75 |
650 | 28.705 | 28.446 | -0.259 | -0.9 |
700 | 31.233 | 31.394 | +0.161 | +0.52 |
750 | 34.064 | 33.720 | -0.344 | -1.01 |
800 | 36.320 | 36.114 | -0.206 | -0.57 |
--------------------------------------------------------------
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-18 23:16:16 +07:00
|
|
|
#ifdef CONFIG_TRACEPOINTS
|
|
|
|
unsigned int num_tracepoints;
|
2011-05-20 05:55:25 +07:00
|
|
|
struct tracepoint * const *tracepoints_ptrs;
|
tracing: Kernel Tracepoints
Implementation of kernel tracepoints. Inspired from the Linux Kernel
Markers. Allows complete typing verification by declaring both tracing
statement inline functions and probe registration/unregistration static
inline functions within the same macro "DEFINE_TRACE". No format string
is required. See the tracepoint Documentation and Samples patches for
usage examples.
Taken from the documentation patch :
"A tracepoint placed in code provides a hook to call a function (probe)
that you can provide at runtime. A tracepoint can be "on" (a probe is
connected to it) or "off" (no probe is attached). When a tracepoint is
"off" it has no effect, except for adding a tiny time penalty (checking
a condition for a branch) and space penalty (adding a few bytes for the
function call at the end of the instrumented function and adds a data
structure in a separate section). When a tracepoint is "on", the
function you provide is called each time the tracepoint is executed, in
the execution context of the caller. When the function provided ends its
execution, it returns to the caller (continuing from the tracepoint
site).
You can put tracepoints at important locations in the code. They are
lightweight hooks that can pass an arbitrary number of parameters, which
prototypes are described in a tracepoint declaration placed in a header
file."
Addition and removal of tracepoints is synchronized by RCU using the
scheduler (and preempt_disable) as guarantees to find a quiescent state
(this is really RCU "classic"). The update side uses rcu_barrier_sched()
with call_rcu_sched() and the read/execute side uses
"preempt_disable()/preempt_enable()".
We make sure the previous array containing probes, which has been
scheduled for deletion by the rcu callback, is indeed freed before we
proceed to the next update. It therefore limits the rate of modification
of a single tracepoint to one update per RCU period. The objective here
is to permit fast batch add/removal of probes on _different_
tracepoints.
Changelog :
- Use #name ":" #proto as string to identify the tracepoint in the
tracepoint table. This will make sure not type mismatch happens due to
connexion of a probe with the wrong type to a tracepoint declared with
the same name in a different header.
- Add tracepoint_entry_free_old.
- Change __TO_TRACE to get rid of the 'i' iterator.
Masami Hiramatsu <mhiramat@redhat.com> :
Tested on x86-64.
Performance impact of a tracepoint : same as markers, except that it
adds about 70 bytes of instructions in an unlikely branch of each
instrumented function (the for loop, the stack setup and the function
call). It currently adds a memory read, a test and a conditional branch
at the instrumentation site (in the hot path). Immediate values will
eventually change this into a load immediate, test and branch, which
removes the memory read which will make the i-cache impact smaller
(changing the memory read for a load immediate removes 3-4 bytes per
site on x86_32 (depending on mov prefixes), or 7-8 bytes on x86_64, it
also saves the d-cache hit).
About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added.
Quoting Hideo Aoki about Markers :
I evaluated overhead of kernel marker using linux-2.6-sched-fixes git
tree, which includes several markers for LTTng, using an ia64 server.
While the immediate trace mark feature isn't implemented on ia64, there
is no major performance regression. So, I think that we don't have any
issues to propose merging marker point patches into Linus's tree from
the viewpoint of performance impact.
I prepared two kernels to evaluate. The first one was compiled without
CONFIG_MARKERS. The second one was enabled CONFIG_MARKERS.
I downloaded the original hackbench from the following URL:
http://devresources.linux-foundation.org/craiger/hackbench/src/hackbench.c
I ran hackbench 5 times in each condition and calculated the average and
difference between the kernels.
The parameter of hackbench: every 50 from 50 to 800
The number of CPUs of the server: 2, 4, and 8
Below is the results. As you can see, major performance regression
wasn't found in any case. Even if number of processes increases,
differences between marker-enabled kernel and marker- disabled kernel
doesn't increase. Moreover, if number of CPUs increases, the differences
doesn't increase either.
Curiously, marker-enabled kernel is better than marker-disabled kernel
in more than half cases, although I guess it comes from the difference
of memory access pattern.
* 2 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 4.811 | 4.872 | +0.061 | +1.27 |
100 | 9.854 | 10.309 | +0.454 | +4.61 |
150 | 15.602 | 15.040 | -0.562 | -3.6 |
200 | 20.489 | 20.380 | -0.109 | -0.53 |
250 | 25.798 | 25.652 | -0.146 | -0.56 |
300 | 31.260 | 30.797 | -0.463 | -1.48 |
350 | 36.121 | 35.770 | -0.351 | -0.97 |
400 | 42.288 | 42.102 | -0.186 | -0.44 |
450 | 47.778 | 47.253 | -0.526 | -1.1 |
500 | 51.953 | 52.278 | +0.325 | +0.63 |
550 | 58.401 | 57.700 | -0.701 | -1.2 |
600 | 63.334 | 63.222 | -0.112 | -0.18 |
650 | 68.816 | 68.511 | -0.306 | -0.44 |
700 | 74.667 | 74.088 | -0.579 | -0.78 |
750 | 78.612 | 79.582 | +0.970 | +1.23 |
800 | 85.431 | 85.263 | -0.168 | -0.2 |
--------------------------------------------------------------
* 4 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.586 | 2.584 | -0.003 | -0.1 |
100 | 5.254 | 5.283 | +0.030 | +0.56 |
150 | 8.012 | 8.074 | +0.061 | +0.76 |
200 | 11.172 | 11.000 | -0.172 | -1.54 |
250 | 13.917 | 14.036 | +0.119 | +0.86 |
300 | 16.905 | 16.543 | -0.362 | -2.14 |
350 | 19.901 | 20.036 | +0.135 | +0.68 |
400 | 22.908 | 23.094 | +0.186 | +0.81 |
450 | 26.273 | 26.101 | -0.172 | -0.66 |
500 | 29.554 | 29.092 | -0.461 | -1.56 |
550 | 32.377 | 32.274 | -0.103 | -0.32 |
600 | 35.855 | 35.322 | -0.533 | -1.49 |
650 | 39.192 | 38.388 | -0.804 | -2.05 |
700 | 41.744 | 41.719 | -0.025 | -0.06 |
750 | 45.016 | 44.496 | -0.520 | -1.16 |
800 | 48.212 | 47.603 | -0.609 | -1.26 |
--------------------------------------------------------------
* 8 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.094 | 2.072 | -0.022 | -1.07 |
100 | 4.162 | 4.273 | +0.111 | +2.66 |
150 | 6.485 | 6.540 | +0.055 | +0.84 |
200 | 8.556 | 8.478 | -0.078 | -0.91 |
250 | 10.458 | 10.258 | -0.200 | -1.91 |
300 | 12.425 | 12.750 | +0.325 | +2.62 |
350 | 14.807 | 14.839 | +0.032 | +0.22 |
400 | 16.801 | 16.959 | +0.158 | +0.94 |
450 | 19.478 | 19.009 | -0.470 | -2.41 |
500 | 21.296 | 21.504 | +0.208 | +0.98 |
550 | 23.842 | 23.979 | +0.137 | +0.57 |
600 | 26.309 | 26.111 | -0.198 | -0.75 |
650 | 28.705 | 28.446 | -0.259 | -0.9 |
700 | 31.233 | 31.394 | +0.161 | +0.52 |
750 | 34.064 | 33.720 | -0.344 | -1.01 |
800 | 36.320 | 36.114 | -0.206 | -0.57 |
--------------------------------------------------------------
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-18 23:16:16 +07:00
|
|
|
#endif
|
2010-09-17 22:09:00 +07:00
|
|
|
#ifdef HAVE_JUMP_LABEL
|
|
|
|
struct jump_entry *jump_entries;
|
|
|
|
unsigned int num_jump_entries;
|
|
|
|
#endif
|
2009-03-06 23:21:49 +07:00
|
|
|
#ifdef CONFIG_TRACING
|
2009-03-06 23:21:48 +07:00
|
|
|
unsigned int num_trace_bprintk_fmt;
|
2011-05-20 05:55:25 +07:00
|
|
|
const char **trace_bprintk_fmt_start;
|
2009-03-06 23:21:48 +07:00
|
|
|
#endif
|
2009-04-11 01:53:50 +07:00
|
|
|
#ifdef CONFIG_EVENT_TRACING
|
tracing: Replace trace_event struct array with pointer array
Currently the trace_event structures are placed in the _ftrace_events
section, and at link time, the linker makes one large array of all
the trace_event structures. On boot up, this array is read (much like
the initcall sections) and the events are processed.
The problem is that there is no guarantee that gcc will place complex
structures nicely together in an array format. Two structures in the
same file may be placed awkwardly, because gcc has no clue that they
are suppose to be in an array.
A hack was used previous to force the alignment to 4, to pack the
structures together. But this caused alignment issues with other
architectures (sparc).
Instead of packing the structures into an array, the structures' addresses
are now put into the _ftrace_event section. As pointers are always the
natural alignment, gcc should always pack them tightly together
(otherwise initcall, extable, etc would also fail).
By having the pointers to the structures in the section, we can still
iterate the trace_events without causing unnecessary alignment problems
with other architectures, or depending on the current behaviour of
gcc that will likely change in the future just to tick us kernel developers
off a little more.
The _ftrace_event section is also moved into the .init.data section
as it is now only needed at boot up.
Suggested-by: David Miller <davem@davemloft.net>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2011-01-27 21:15:30 +07:00
|
|
|
struct ftrace_event_call **trace_events;
|
2009-04-11 01:53:50 +07:00
|
|
|
unsigned int num_trace_events;
|
|
|
|
#endif
|
2009-04-16 00:24:06 +07:00
|
|
|
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
|
|
|
|
unsigned int num_ftrace_callsites;
|
2011-05-20 05:55:25 +07:00
|
|
|
unsigned long *ftrace_callsites;
|
2009-04-16 00:24:06 +07:00
|
|
|
#endif
|
2009-03-06 23:21:48 +07:00
|
|
|
|
2008-07-23 07:24:26 +07:00
|
|
|
#ifdef CONFIG_MODULE_UNLOAD
|
|
|
|
/* What modules depend on me? */
|
2010-06-01 02:19:37 +07:00
|
|
|
struct list_head source_list;
|
|
|
|
/* What modules do I depend on? */
|
|
|
|
struct list_head target_list;
|
2008-07-23 07:24:26 +07:00
|
|
|
|
|
|
|
/* Who is waiting for us to be unloaded */
|
|
|
|
struct task_struct *waiter;
|
|
|
|
|
|
|
|
/* Destruction function. */
|
|
|
|
void (*exit)(void);
|
|
|
|
|
2010-01-05 13:34:50 +07:00
|
|
|
struct module_ref {
|
2010-04-01 15:09:40 +07:00
|
|
|
unsigned int incs;
|
|
|
|
unsigned int decs;
|
2010-02-02 12:38:57 +07:00
|
|
|
} __percpu *refptr;
|
2008-07-23 07:24:26 +07:00
|
|
|
#endif
|
2009-06-18 06:28:03 +07:00
|
|
|
|
|
|
|
#ifdef CONFIG_CONSTRUCTORS
|
|
|
|
/* Constructor functions. */
|
|
|
|
ctor_fn_t *ctors;
|
|
|
|
unsigned int num_ctors;
|
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
2007-05-09 16:35:15 +07:00
|
|
|
#ifndef MODULE_ARCH_INIT
|
|
|
|
#define MODULE_ARCH_INIT {}
|
|
|
|
#endif
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2008-12-06 07:03:59 +07:00
|
|
|
extern struct mutex module_mutex;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* FIXME: It'd be nice to isolate modules during init, too, so they
|
|
|
|
aren't used before they (may) fail. But presently too much code
|
|
|
|
(IDE & SCSI) require entry into the module during init.*/
|
|
|
|
static inline int module_is_live(struct module *mod)
|
|
|
|
{
|
|
|
|
return mod->state != MODULE_STATE_GOING;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct module *__module_text_address(unsigned long addr);
|
2009-04-01 02:05:31 +07:00
|
|
|
struct module *__module_address(unsigned long addr);
|
|
|
|
bool is_module_address(unsigned long addr);
|
2010-03-10 16:57:54 +07:00
|
|
|
bool is_module_percpu_address(unsigned long addr);
|
2009-04-01 02:05:31 +07:00
|
|
|
bool is_module_text_address(unsigned long addr);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2009-01-07 05:41:49 +07:00
|
|
|
static inline int within_module_core(unsigned long addr, struct module *mod)
|
|
|
|
{
|
|
|
|
return (unsigned long)mod->module_core <= addr &&
|
|
|
|
addr < (unsigned long)mod->module_core + mod->core_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int within_module_init(unsigned long addr, struct module *mod)
|
|
|
|
{
|
|
|
|
return (unsigned long)mod->module_init <= addr &&
|
|
|
|
addr < (unsigned long)mod->module_init + mod->init_size;
|
|
|
|
}
|
|
|
|
|
2008-12-06 07:03:59 +07:00
|
|
|
/* Search for module by name: must hold module_mutex. */
|
|
|
|
struct module *find_module(const char *name);
|
|
|
|
|
|
|
|
struct symsearch {
|
|
|
|
const struct kernel_symbol *start, *stop;
|
|
|
|
const unsigned long *crcs;
|
|
|
|
enum {
|
|
|
|
NOT_GPL_ONLY,
|
|
|
|
GPL_ONLY,
|
|
|
|
WILL_BE_GPL_ONLY,
|
|
|
|
} licence;
|
|
|
|
bool unused;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Search for an exported symbol by name. */
|
|
|
|
const struct kernel_symbol *find_symbol(const char *name,
|
|
|
|
struct module **owner,
|
|
|
|
const unsigned long **crc,
|
|
|
|
bool gplok,
|
|
|
|
bool warn);
|
|
|
|
|
|
|
|
/* Walk the exported symbol table */
|
2011-04-20 02:49:58 +07:00
|
|
|
bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
|
|
|
|
struct module *owner,
|
|
|
|
void *data), void *data);
|
2008-12-06 07:03:59 +07:00
|
|
|
|
2007-05-08 14:28:39 +07:00
|
|
|
/* Returns 0 and fills in value, defined and namebuf, or -ERANGE if
|
2005-04-17 05:20:36 +07:00
|
|
|
symnum out of range. */
|
2007-05-08 14:28:39 +07:00
|
|
|
int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
|
|
|
|
char *name, char *module_name, int *exported);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Look for this name: can be of form module:name. */
|
|
|
|
unsigned long module_kallsyms_lookup_name(const char *name);
|
|
|
|
|
2008-12-06 07:03:58 +07:00
|
|
|
int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
|
|
|
|
struct module *, unsigned long),
|
|
|
|
void *data);
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
extern void __module_put_and_exit(struct module *mod, long code)
|
|
|
|
__attribute__((noreturn));
|
|
|
|
#define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code);
|
|
|
|
|
|
|
|
#ifdef CONFIG_MODULE_UNLOAD
|
|
|
|
unsigned int module_refcount(struct module *mod);
|
|
|
|
void __symbol_put(const char *symbol);
|
|
|
|
#define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x)
|
|
|
|
void symbol_put_addr(void *addr);
|
|
|
|
|
|
|
|
/* Sometimes we know we already have a refcount, and it's easier not
|
|
|
|
to handle the error case (which only happens with rmmod --wait). */
|
|
|
|
static inline void __module_get(struct module *module)
|
|
|
|
{
|
|
|
|
if (module) {
|
2010-01-05 13:34:50 +07:00
|
|
|
preempt_disable();
|
2010-04-01 15:09:40 +07:00
|
|
|
__this_cpu_inc(module->refptr->incs);
|
2010-03-24 09:57:43 +07:00
|
|
|
trace_module_get(module, _THIS_IP_);
|
2010-01-05 13:34:50 +07:00
|
|
|
preempt_enable();
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int try_module_get(struct module *module)
|
|
|
|
{
|
|
|
|
int ret = 1;
|
|
|
|
|
|
|
|
if (module) {
|
2010-01-05 13:34:50 +07:00
|
|
|
preempt_disable();
|
|
|
|
|
2009-08-17 15:56:28 +07:00
|
|
|
if (likely(module_is_live(module))) {
|
2010-04-01 15:09:40 +07:00
|
|
|
__this_cpu_inc(module->refptr->incs);
|
2010-03-24 09:57:43 +07:00
|
|
|
trace_module_get(module, _THIS_IP_);
|
2010-04-01 15:09:40 +07:00
|
|
|
} else
|
2005-04-17 05:20:36 +07:00
|
|
|
ret = 0;
|
2010-01-05 13:34:50 +07:00
|
|
|
|
|
|
|
preempt_enable();
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2006-10-18 12:47:25 +07:00
|
|
|
extern void module_put(struct module *module);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
#else /*!CONFIG_MODULE_UNLOAD*/
|
|
|
|
static inline int try_module_get(struct module *module)
|
|
|
|
{
|
|
|
|
return !module || module_is_live(module);
|
|
|
|
}
|
|
|
|
static inline void module_put(struct module *module)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
static inline void __module_get(struct module *module)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
#define symbol_put(x) do { } while(0)
|
|
|
|
#define symbol_put_addr(p) do { } while(0)
|
|
|
|
|
|
|
|
#endif /* CONFIG_MODULE_UNLOAD */
|
2010-11-25 04:21:10 +07:00
|
|
|
int ref_module(struct module *a, struct module *b);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* This is a #define so the string doesn't get put in every .o file */
|
|
|
|
#define module_name(mod) \
|
|
|
|
({ \
|
|
|
|
struct module *__mod = (mod); \
|
|
|
|
__mod ? __mod->name : "kernel"; \
|
|
|
|
})
|
|
|
|
|
2008-01-30 05:13:22 +07:00
|
|
|
/* For kallsyms to ask for address resolution. namebuf should be at
|
|
|
|
* least KSYM_NAME_LEN long: a pointer to namebuf is returned if
|
|
|
|
* found, otherwise NULL. */
|
2008-02-08 19:18:43 +07:00
|
|
|
const char *module_address_lookup(unsigned long addr,
|
2008-01-30 05:13:22 +07:00
|
|
|
unsigned long *symbolsize,
|
|
|
|
unsigned long *offset,
|
|
|
|
char **modname,
|
|
|
|
char *namebuf);
|
2007-05-08 14:28:43 +07:00
|
|
|
int lookup_module_symbol_name(unsigned long addr, char *symname);
|
2007-05-08 14:28:47 +07:00
|
|
|
int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* For extable.c to search modules' exception tables. */
|
|
|
|
const struct exception_table_entry *search_module_extables(unsigned long addr);
|
|
|
|
|
|
|
|
int register_module_notifier(struct notifier_block * nb);
|
|
|
|
int unregister_module_notifier(struct notifier_block * nb);
|
|
|
|
|
|
|
|
extern void print_modules(void);
|
|
|
|
|
tracing: Kernel Tracepoints
Implementation of kernel tracepoints. Inspired from the Linux Kernel
Markers. Allows complete typing verification by declaring both tracing
statement inline functions and probe registration/unregistration static
inline functions within the same macro "DEFINE_TRACE". No format string
is required. See the tracepoint Documentation and Samples patches for
usage examples.
Taken from the documentation patch :
"A tracepoint placed in code provides a hook to call a function (probe)
that you can provide at runtime. A tracepoint can be "on" (a probe is
connected to it) or "off" (no probe is attached). When a tracepoint is
"off" it has no effect, except for adding a tiny time penalty (checking
a condition for a branch) and space penalty (adding a few bytes for the
function call at the end of the instrumented function and adds a data
structure in a separate section). When a tracepoint is "on", the
function you provide is called each time the tracepoint is executed, in
the execution context of the caller. When the function provided ends its
execution, it returns to the caller (continuing from the tracepoint
site).
You can put tracepoints at important locations in the code. They are
lightweight hooks that can pass an arbitrary number of parameters, which
prototypes are described in a tracepoint declaration placed in a header
file."
Addition and removal of tracepoints is synchronized by RCU using the
scheduler (and preempt_disable) as guarantees to find a quiescent state
(this is really RCU "classic"). The update side uses rcu_barrier_sched()
with call_rcu_sched() and the read/execute side uses
"preempt_disable()/preempt_enable()".
We make sure the previous array containing probes, which has been
scheduled for deletion by the rcu callback, is indeed freed before we
proceed to the next update. It therefore limits the rate of modification
of a single tracepoint to one update per RCU period. The objective here
is to permit fast batch add/removal of probes on _different_
tracepoints.
Changelog :
- Use #name ":" #proto as string to identify the tracepoint in the
tracepoint table. This will make sure not type mismatch happens due to
connexion of a probe with the wrong type to a tracepoint declared with
the same name in a different header.
- Add tracepoint_entry_free_old.
- Change __TO_TRACE to get rid of the 'i' iterator.
Masami Hiramatsu <mhiramat@redhat.com> :
Tested on x86-64.
Performance impact of a tracepoint : same as markers, except that it
adds about 70 bytes of instructions in an unlikely branch of each
instrumented function (the for loop, the stack setup and the function
call). It currently adds a memory read, a test and a conditional branch
at the instrumentation site (in the hot path). Immediate values will
eventually change this into a load immediate, test and branch, which
removes the memory read which will make the i-cache impact smaller
(changing the memory read for a load immediate removes 3-4 bytes per
site on x86_32 (depending on mov prefixes), or 7-8 bytes on x86_64, it
also saves the d-cache hit).
About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added.
Quoting Hideo Aoki about Markers :
I evaluated overhead of kernel marker using linux-2.6-sched-fixes git
tree, which includes several markers for LTTng, using an ia64 server.
While the immediate trace mark feature isn't implemented on ia64, there
is no major performance regression. So, I think that we don't have any
issues to propose merging marker point patches into Linus's tree from
the viewpoint of performance impact.
I prepared two kernels to evaluate. The first one was compiled without
CONFIG_MARKERS. The second one was enabled CONFIG_MARKERS.
I downloaded the original hackbench from the following URL:
http://devresources.linux-foundation.org/craiger/hackbench/src/hackbench.c
I ran hackbench 5 times in each condition and calculated the average and
difference between the kernels.
The parameter of hackbench: every 50 from 50 to 800
The number of CPUs of the server: 2, 4, and 8
Below is the results. As you can see, major performance regression
wasn't found in any case. Even if number of processes increases,
differences between marker-enabled kernel and marker- disabled kernel
doesn't increase. Moreover, if number of CPUs increases, the differences
doesn't increase either.
Curiously, marker-enabled kernel is better than marker-disabled kernel
in more than half cases, although I guess it comes from the difference
of memory access pattern.
* 2 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 4.811 | 4.872 | +0.061 | +1.27 |
100 | 9.854 | 10.309 | +0.454 | +4.61 |
150 | 15.602 | 15.040 | -0.562 | -3.6 |
200 | 20.489 | 20.380 | -0.109 | -0.53 |
250 | 25.798 | 25.652 | -0.146 | -0.56 |
300 | 31.260 | 30.797 | -0.463 | -1.48 |
350 | 36.121 | 35.770 | -0.351 | -0.97 |
400 | 42.288 | 42.102 | -0.186 | -0.44 |
450 | 47.778 | 47.253 | -0.526 | -1.1 |
500 | 51.953 | 52.278 | +0.325 | +0.63 |
550 | 58.401 | 57.700 | -0.701 | -1.2 |
600 | 63.334 | 63.222 | -0.112 | -0.18 |
650 | 68.816 | 68.511 | -0.306 | -0.44 |
700 | 74.667 | 74.088 | -0.579 | -0.78 |
750 | 78.612 | 79.582 | +0.970 | +1.23 |
800 | 85.431 | 85.263 | -0.168 | -0.2 |
--------------------------------------------------------------
* 4 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.586 | 2.584 | -0.003 | -0.1 |
100 | 5.254 | 5.283 | +0.030 | +0.56 |
150 | 8.012 | 8.074 | +0.061 | +0.76 |
200 | 11.172 | 11.000 | -0.172 | -1.54 |
250 | 13.917 | 14.036 | +0.119 | +0.86 |
300 | 16.905 | 16.543 | -0.362 | -2.14 |
350 | 19.901 | 20.036 | +0.135 | +0.68 |
400 | 22.908 | 23.094 | +0.186 | +0.81 |
450 | 26.273 | 26.101 | -0.172 | -0.66 |
500 | 29.554 | 29.092 | -0.461 | -1.56 |
550 | 32.377 | 32.274 | -0.103 | -0.32 |
600 | 35.855 | 35.322 | -0.533 | -1.49 |
650 | 39.192 | 38.388 | -0.804 | -2.05 |
700 | 41.744 | 41.719 | -0.025 | -0.06 |
750 | 45.016 | 44.496 | -0.520 | -1.16 |
800 | 48.212 | 47.603 | -0.609 | -1.26 |
--------------------------------------------------------------
* 8 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.094 | 2.072 | -0.022 | -1.07 |
100 | 4.162 | 4.273 | +0.111 | +2.66 |
150 | 6.485 | 6.540 | +0.055 | +0.84 |
200 | 8.556 | 8.478 | -0.078 | -0.91 |
250 | 10.458 | 10.258 | -0.200 | -1.91 |
300 | 12.425 | 12.750 | +0.325 | +2.62 |
350 | 14.807 | 14.839 | +0.032 | +0.22 |
400 | 16.801 | 16.959 | +0.158 | +0.94 |
450 | 19.478 | 19.009 | -0.470 | -2.41 |
500 | 21.296 | 21.504 | +0.208 | +0.98 |
550 | 23.842 | 23.979 | +0.137 | +0.57 |
600 | 26.309 | 26.111 | -0.198 | -0.75 |
650 | 28.705 | 28.446 | -0.259 | -0.9 |
700 | 31.233 | 31.394 | +0.161 | +0.52 |
750 | 34.064 | 33.720 | -0.344 | -1.01 |
800 | 36.320 | 36.114 | -0.206 | -0.57 |
--------------------------------------------------------------
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-18 23:16:16 +07:00
|
|
|
extern void module_update_tracepoints(void);
|
|
|
|
extern int module_get_iter_tracepoints(struct tracepoint_iter *iter);
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#else /* !CONFIG_MODULES... */
|
|
|
|
#define EXPORT_SYMBOL(sym)
|
|
|
|
#define EXPORT_SYMBOL_GPL(sym)
|
2006-03-21 04:17:13 +07:00
|
|
|
#define EXPORT_SYMBOL_GPL_FUTURE(sym)
|
2006-06-28 18:26:45 +07:00
|
|
|
#define EXPORT_UNUSED_SYMBOL(sym)
|
|
|
|
#define EXPORT_UNUSED_SYMBOL_GPL(sym)
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Given an address, look for it in the exception tables. */
|
|
|
|
static inline const struct exception_table_entry *
|
|
|
|
search_module_extables(unsigned long addr)
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2009-04-01 02:05:31 +07:00
|
|
|
static inline struct module *__module_address(unsigned long addr)
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
static inline struct module *__module_text_address(unsigned long addr)
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2009-04-01 02:05:31 +07:00
|
|
|
static inline bool is_module_address(unsigned long addr)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2010-03-31 09:33:42 +07:00
|
|
|
static inline bool is_module_percpu_address(unsigned long addr)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2009-04-01 02:05:31 +07:00
|
|
|
static inline bool is_module_text_address(unsigned long addr)
|
2006-07-03 14:24:24 +07:00
|
|
|
{
|
2009-04-01 02:05:31 +07:00
|
|
|
return false;
|
2006-07-03 14:24:24 +07:00
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Get/put a kernel symbol (calls should be symmetric) */
|
|
|
|
#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); })
|
|
|
|
#define symbol_put(x) do { } while(0)
|
|
|
|
#define symbol_put_addr(x) do { } while(0)
|
|
|
|
|
|
|
|
static inline void __module_get(struct module *module)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int try_module_get(struct module *module)
|
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void module_put(struct module *module)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
#define module_name(mod) "kernel"
|
|
|
|
|
|
|
|
/* For kallsyms to ask for address resolution. NULL means not found. */
|
2008-02-08 19:18:43 +07:00
|
|
|
static inline const char *module_address_lookup(unsigned long addr,
|
2008-01-30 05:13:22 +07:00
|
|
|
unsigned long *symbolsize,
|
|
|
|
unsigned long *offset,
|
|
|
|
char **modname,
|
|
|
|
char *namebuf)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2007-05-08 14:28:43 +07:00
|
|
|
static inline int lookup_module_symbol_name(unsigned long addr, char *symname)
|
|
|
|
{
|
|
|
|
return -ERANGE;
|
|
|
|
}
|
|
|
|
|
2007-05-08 14:28:47 +07:00
|
|
|
static inline int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name)
|
|
|
|
{
|
|
|
|
return -ERANGE;
|
|
|
|
}
|
|
|
|
|
2007-05-08 14:28:39 +07:00
|
|
|
static inline int module_get_kallsym(unsigned int symnum, unsigned long *value,
|
|
|
|
char *type, char *name,
|
|
|
|
char *module_name, int *exported)
|
2005-04-17 05:20:36 +07:00
|
|
|
{
|
2007-05-08 14:28:39 +07:00
|
|
|
return -ERANGE;
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned long module_kallsyms_lookup_name(const char *name)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-12-06 07:03:58 +07:00
|
|
|
static inline int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
|
|
|
|
struct module *,
|
|
|
|
unsigned long),
|
|
|
|
void *data)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
static inline int register_module_notifier(struct notifier_block * nb)
|
|
|
|
{
|
|
|
|
/* no events will happen anyway, so this can always succeed */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int unregister_module_notifier(struct notifier_block * nb)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define module_put_and_exit(code) do_exit(code)
|
|
|
|
|
|
|
|
static inline void print_modules(void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
tracing: Kernel Tracepoints
Implementation of kernel tracepoints. Inspired from the Linux Kernel
Markers. Allows complete typing verification by declaring both tracing
statement inline functions and probe registration/unregistration static
inline functions within the same macro "DEFINE_TRACE". No format string
is required. See the tracepoint Documentation and Samples patches for
usage examples.
Taken from the documentation patch :
"A tracepoint placed in code provides a hook to call a function (probe)
that you can provide at runtime. A tracepoint can be "on" (a probe is
connected to it) or "off" (no probe is attached). When a tracepoint is
"off" it has no effect, except for adding a tiny time penalty (checking
a condition for a branch) and space penalty (adding a few bytes for the
function call at the end of the instrumented function and adds a data
structure in a separate section). When a tracepoint is "on", the
function you provide is called each time the tracepoint is executed, in
the execution context of the caller. When the function provided ends its
execution, it returns to the caller (continuing from the tracepoint
site).
You can put tracepoints at important locations in the code. They are
lightweight hooks that can pass an arbitrary number of parameters, which
prototypes are described in a tracepoint declaration placed in a header
file."
Addition and removal of tracepoints is synchronized by RCU using the
scheduler (and preempt_disable) as guarantees to find a quiescent state
(this is really RCU "classic"). The update side uses rcu_barrier_sched()
with call_rcu_sched() and the read/execute side uses
"preempt_disable()/preempt_enable()".
We make sure the previous array containing probes, which has been
scheduled for deletion by the rcu callback, is indeed freed before we
proceed to the next update. It therefore limits the rate of modification
of a single tracepoint to one update per RCU period. The objective here
is to permit fast batch add/removal of probes on _different_
tracepoints.
Changelog :
- Use #name ":" #proto as string to identify the tracepoint in the
tracepoint table. This will make sure not type mismatch happens due to
connexion of a probe with the wrong type to a tracepoint declared with
the same name in a different header.
- Add tracepoint_entry_free_old.
- Change __TO_TRACE to get rid of the 'i' iterator.
Masami Hiramatsu <mhiramat@redhat.com> :
Tested on x86-64.
Performance impact of a tracepoint : same as markers, except that it
adds about 70 bytes of instructions in an unlikely branch of each
instrumented function (the for loop, the stack setup and the function
call). It currently adds a memory read, a test and a conditional branch
at the instrumentation site (in the hot path). Immediate values will
eventually change this into a load immediate, test and branch, which
removes the memory read which will make the i-cache impact smaller
(changing the memory read for a load immediate removes 3-4 bytes per
site on x86_32 (depending on mov prefixes), or 7-8 bytes on x86_64, it
also saves the d-cache hit).
About the performance impact of tracepoints (which is comparable to
markers), even without immediate values optimizations, tests done by
Hideo Aoki on ia64 show no regression. His test case was using hackbench
on a kernel where scheduler instrumentation (about 5 events in code
scheduler code) was added.
Quoting Hideo Aoki about Markers :
I evaluated overhead of kernel marker using linux-2.6-sched-fixes git
tree, which includes several markers for LTTng, using an ia64 server.
While the immediate trace mark feature isn't implemented on ia64, there
is no major performance regression. So, I think that we don't have any
issues to propose merging marker point patches into Linus's tree from
the viewpoint of performance impact.
I prepared two kernels to evaluate. The first one was compiled without
CONFIG_MARKERS. The second one was enabled CONFIG_MARKERS.
I downloaded the original hackbench from the following URL:
http://devresources.linux-foundation.org/craiger/hackbench/src/hackbench.c
I ran hackbench 5 times in each condition and calculated the average and
difference between the kernels.
The parameter of hackbench: every 50 from 50 to 800
The number of CPUs of the server: 2, 4, and 8
Below is the results. As you can see, major performance regression
wasn't found in any case. Even if number of processes increases,
differences between marker-enabled kernel and marker- disabled kernel
doesn't increase. Moreover, if number of CPUs increases, the differences
doesn't increase either.
Curiously, marker-enabled kernel is better than marker-disabled kernel
in more than half cases, although I guess it comes from the difference
of memory access pattern.
* 2 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 4.811 | 4.872 | +0.061 | +1.27 |
100 | 9.854 | 10.309 | +0.454 | +4.61 |
150 | 15.602 | 15.040 | -0.562 | -3.6 |
200 | 20.489 | 20.380 | -0.109 | -0.53 |
250 | 25.798 | 25.652 | -0.146 | -0.56 |
300 | 31.260 | 30.797 | -0.463 | -1.48 |
350 | 36.121 | 35.770 | -0.351 | -0.97 |
400 | 42.288 | 42.102 | -0.186 | -0.44 |
450 | 47.778 | 47.253 | -0.526 | -1.1 |
500 | 51.953 | 52.278 | +0.325 | +0.63 |
550 | 58.401 | 57.700 | -0.701 | -1.2 |
600 | 63.334 | 63.222 | -0.112 | -0.18 |
650 | 68.816 | 68.511 | -0.306 | -0.44 |
700 | 74.667 | 74.088 | -0.579 | -0.78 |
750 | 78.612 | 79.582 | +0.970 | +1.23 |
800 | 85.431 | 85.263 | -0.168 | -0.2 |
--------------------------------------------------------------
* 4 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.586 | 2.584 | -0.003 | -0.1 |
100 | 5.254 | 5.283 | +0.030 | +0.56 |
150 | 8.012 | 8.074 | +0.061 | +0.76 |
200 | 11.172 | 11.000 | -0.172 | -1.54 |
250 | 13.917 | 14.036 | +0.119 | +0.86 |
300 | 16.905 | 16.543 | -0.362 | -2.14 |
350 | 19.901 | 20.036 | +0.135 | +0.68 |
400 | 22.908 | 23.094 | +0.186 | +0.81 |
450 | 26.273 | 26.101 | -0.172 | -0.66 |
500 | 29.554 | 29.092 | -0.461 | -1.56 |
550 | 32.377 | 32.274 | -0.103 | -0.32 |
600 | 35.855 | 35.322 | -0.533 | -1.49 |
650 | 39.192 | 38.388 | -0.804 | -2.05 |
700 | 41.744 | 41.719 | -0.025 | -0.06 |
750 | 45.016 | 44.496 | -0.520 | -1.16 |
800 | 48.212 | 47.603 | -0.609 | -1.26 |
--------------------------------------------------------------
* 8 CPUs
Number of | without | with | diff | diff |
processes | Marker [Sec] | Marker [Sec] | [Sec] | [%] |
--------------------------------------------------------------
50 | 2.094 | 2.072 | -0.022 | -1.07 |
100 | 4.162 | 4.273 | +0.111 | +2.66 |
150 | 6.485 | 6.540 | +0.055 | +0.84 |
200 | 8.556 | 8.478 | -0.078 | -0.91 |
250 | 10.458 | 10.258 | -0.200 | -1.91 |
300 | 12.425 | 12.750 | +0.325 | +2.62 |
350 | 14.807 | 14.839 | +0.032 | +0.22 |
400 | 16.801 | 16.959 | +0.158 | +0.94 |
450 | 19.478 | 19.009 | -0.470 | -2.41 |
500 | 21.296 | 21.504 | +0.208 | +0.98 |
550 | 23.842 | 23.979 | +0.137 | +0.57 |
600 | 26.309 | 26.111 | -0.198 | -0.75 |
650 | 28.705 | 28.446 | -0.259 | -0.9 |
700 | 31.233 | 31.394 | +0.161 | +0.52 |
750 | 34.064 | 33.720 | -0.344 | -1.01 |
800 | 36.320 | 36.114 | -0.206 | -0.57 |
--------------------------------------------------------------
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: 'Peter Zijlstra' <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-07-18 23:16:16 +07:00
|
|
|
static inline void module_update_tracepoints(void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
2007-02-14 06:19:06 +07:00
|
|
|
#endif /* CONFIG_MODULES */
|
|
|
|
|
|
|
|
#ifdef CONFIG_SYSFS
|
2007-11-02 00:39:50 +07:00
|
|
|
extern struct kset *module_kset;
|
|
|
|
extern struct kobj_type module_ktype;
|
|
|
|
extern int module_sysfs_initialized;
|
2007-02-14 06:19:06 +07:00
|
|
|
#endif /* CONFIG_SYSFS */
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x)
|
|
|
|
|
|
|
|
/* BELOW HERE ALL THESE ARE OBSOLETE AND WILL VANISH */
|
|
|
|
|
|
|
|
#define __MODULE_STRING(x) __stringify(x)
|
|
|
|
|
2010-11-17 04:35:16 +07:00
|
|
|
#ifdef CONFIG_DEBUG_SET_MODULE_RONX
|
|
|
|
extern void set_all_modules_text_rw(void);
|
|
|
|
extern void set_all_modules_text_ro(void);
|
|
|
|
#else
|
|
|
|
static inline void set_all_modules_text_rw(void) { }
|
|
|
|
static inline void set_all_modules_text_ro(void) { }
|
|
|
|
#endif
|
2009-06-17 05:33:37 +07:00
|
|
|
|
|
|
|
#ifdef CONFIG_GENERIC_BUG
|
2010-10-06 01:29:27 +07:00
|
|
|
void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *,
|
2009-06-17 05:33:37 +07:00
|
|
|
struct module *);
|
|
|
|
void module_bug_cleanup(struct module *);
|
|
|
|
|
|
|
|
#else /* !CONFIG_GENERIC_BUG */
|
|
|
|
|
2010-10-06 01:29:27 +07:00
|
|
|
static inline void module_bug_finalize(const Elf_Ehdr *hdr,
|
2009-06-17 05:33:37 +07:00
|
|
|
const Elf_Shdr *sechdrs,
|
|
|
|
struct module *mod)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
static inline void module_bug_cleanup(struct module *mod) {}
|
|
|
|
#endif /* CONFIG_GENERIC_BUG */
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
#endif /* _LINUX_MODULE_H */
|