The exciting thing here is the getting rid of stop_machine on module

removal.  This is possible by using a simple atomic_t for the counter,
 rather than our fancy per-cpu counter: it turns out that no one is doing
 a module increment per net packet, so the slowdown should be in the noise.
 
 Also, script fixed for new git version.
 
 Cheers,
 Rusty.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJUk3cQAAoJENkgDmzRrbjxr44P/25ZBYmKZZ3XM3flt2o0LCti
 1Px+MRbWuXhueWQOYZSXOO3c2ENNuV3siaU4jQZqnxslpdvT4rVsVFkYuwva2vHT
 hqpoq1Hz++yjFJArjERFOdoZ1gxkBbZQQGYm8esToAqU3b2Z74SrU48dPwp65q/1
 r6hbXdWSiKALEBZeW2coi+QVCL/oxE8hmNqDO1mpe82aEKu0xIVpTdU5vAfBIj8/
 Z95U2bx+CjiP7khhSjBGtltLqxL6QXw1m2eg1gO9nf1gJNI0/dAY6IJmFbGz+7Bt
 CAyc9BRsB40Em8G7d7wr4FsURcLfmYNdjtx79j+Rot5PkVIi+Ztv7C1QYlMQESPa
 ESddUMySOmKlzTm50w3ZLvV1ZTRU8TjmttSkzQYZ3csCLkKUgfeL9SAxU9KGoA2l
 jFxrvDcWEHtuU1D/FeYyOofNaD/BflPfdhj4WAm9XnPPi+THEu7fulWJaIP4glHh
 8TpYNbinXuZqXO4nJ41Ad5utbSbBQa4fFBUuViWRTU0TtWJT2HVqn/XoYJ5mnPEz
 IbYh31rQDKFJKzePfscWrJ6XzoF59yGiAVcWcI3HS7aT8bFZGapAQu9mNCVu+cLF
 uRxWrukHG7d8YeYrAtbVXWfxArR155V9QJN55hQ1nKLq2M03gNvYTtAPw2yEsfuw
 u3Fk/KkV1RfaiFurjoG/
 =rDum
 -----END PGP SIGNATURE-----

Merge tag 'modules-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux

Pull module updates from Rusty Russell:
 "The exciting thing here is the getting rid of stop_machine on module
  removal.  This is possible by using a simple atomic_t for the counter,
  rather than our fancy per-cpu counter: it turns out that no one is
  doing a module increment per net packet, so the slowdown should be in
  the noise"

* tag 'modules-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux:
  param: do not set store func without write perm
  params: cleanup sysfs allocation
  kernel:module Fix coding style errors and warnings.
  module: Remove stop_machine from module unloading
  module: Replace module_ref with atomic_t refcnt
  lib/bug: Use RCU list ops for module_bug_list
  module: Unlink module with RCU synchronizing instead of stop_machine
  module: Wait for RCU synchronizing before releasing a module
This commit is contained in:
Linus Torvalds 2014-12-18 20:55:41 -08:00
commit d790be3863
5 changed files with 138 additions and 169 deletions

View File

@ -210,20 +210,6 @@ enum module_state {
MODULE_STATE_UNFORMED, /* Still setting it up. */
};
/**
* struct module_ref - per cpu module reference counts
* @incs: number of module get on this cpu
* @decs: number of module put on this cpu
*
* We force an alignment on 8 or 16 bytes, so that alloc_percpu()
* put @incs/@decs in same cache line, with no extra memory cost,
* since alloc_percpu() is fine grained.
*/
struct module_ref {
unsigned long incs;
unsigned long decs;
} __attribute((aligned(2 * sizeof(unsigned long))));
struct module {
enum module_state state;
@ -367,7 +353,7 @@ struct module {
/* Destruction function. */
void (*exit)(void);
struct module_ref __percpu *refptr;
atomic_t refcnt;
#endif
#ifdef CONFIG_CONSTRUCTORS

View File

@ -80,7 +80,7 @@ DECLARE_EVENT_CLASS(module_refcnt,
TP_fast_assign(
__entry->ip = ip;
__entry->refcnt = __this_cpu_read(mod->refptr->incs) - __this_cpu_read(mod->refptr->decs);
__entry->refcnt = atomic_read(&mod->refcnt);
__assign_str(name, mod->name);
),

View File

@ -42,7 +42,6 @@
#include <linux/vermagic.h>
#include <linux/notifier.h>
#include <linux/sched.h>
#include <linux/stop_machine.h>
#include <linux/device.h>
#include <linux/string.h>
#include <linux/mutex.h>
@ -98,7 +97,7 @@
* 1) List of modules (also safely readable with preempt_disable),
* 2) module_use links,
* 3) module_addr_min/module_addr_max.
* (delete uses stop_machine/add uses RCU list operations). */
* (delete and add uses RCU list operations). */
DEFINE_MUTEX(module_mutex);
EXPORT_SYMBOL_GPL(module_mutex);
static LIST_HEAD(modules);
@ -158,13 +157,13 @@ static BLOCKING_NOTIFIER_HEAD(module_notify_list);
* Protected by module_mutex. */
static unsigned long module_addr_min = -1UL, module_addr_max = 0;
int register_module_notifier(struct notifier_block * nb)
int register_module_notifier(struct notifier_block *nb)
{
return blocking_notifier_chain_register(&module_notify_list, nb);
}
EXPORT_SYMBOL(register_module_notifier);
int unregister_module_notifier(struct notifier_block * nb)
int unregister_module_notifier(struct notifier_block *nb)
{
return blocking_notifier_chain_unregister(&module_notify_list, nb);
}
@ -628,18 +627,23 @@ static char last_unloaded_module[MODULE_NAME_LEN+1];
EXPORT_TRACEPOINT_SYMBOL(module_get);
/* MODULE_REF_BASE is the base reference count by kmodule loader. */
#define MODULE_REF_BASE 1
/* Init the unload section of the module. */
static int module_unload_init(struct module *mod)
{
mod->refptr = alloc_percpu(struct module_ref);
if (!mod->refptr)
return -ENOMEM;
/*
* Initialize reference counter to MODULE_REF_BASE.
* refcnt == 0 means module is going.
*/
atomic_set(&mod->refcnt, MODULE_REF_BASE);
INIT_LIST_HEAD(&mod->source_list);
INIT_LIST_HEAD(&mod->target_list);
/* Hold reference count during initialization. */
raw_cpu_write(mod->refptr->incs, 1);
atomic_inc(&mod->refcnt);
return 0;
}
@ -721,8 +725,6 @@ static void module_unload_free(struct module *mod)
kfree(use);
}
mutex_unlock(&module_mutex);
free_percpu(mod->refptr);
}
#ifdef CONFIG_MODULE_FORCE_UNLOAD
@ -740,60 +742,39 @@ static inline int try_force_unload(unsigned int flags)
}
#endif /* CONFIG_MODULE_FORCE_UNLOAD */
struct stopref
/* Try to release refcount of module, 0 means success. */
static int try_release_module_ref(struct module *mod)
{
struct module *mod;
int flags;
int *forced;
};
int ret;
/* Whole machine is stopped with interrupts off when this runs. */
static int __try_stop_module(void *_sref)
{
struct stopref *sref = _sref;
/* Try to decrement refcnt which we set at loading */
ret = atomic_sub_return(MODULE_REF_BASE, &mod->refcnt);
BUG_ON(ret < 0);
if (ret)
/* Someone can put this right now, recover with checking */
ret = atomic_add_unless(&mod->refcnt, MODULE_REF_BASE, 0);
/* If it's not unused, quit unless we're forcing. */
if (module_refcount(sref->mod) != 0) {
if (!(*sref->forced = try_force_unload(sref->flags)))
return -EWOULDBLOCK;
}
/* Mark it as dying. */
sref->mod->state = MODULE_STATE_GOING;
return 0;
return ret;
}
static int try_stop_module(struct module *mod, int flags, int *forced)
{
struct stopref sref = { mod, flags, forced };
/* If it's not unused, quit unless we're forcing. */
if (try_release_module_ref(mod) != 0) {
*forced = try_force_unload(flags);
if (!(*forced))
return -EWOULDBLOCK;
}
return stop_machine(__try_stop_module, &sref, NULL);
/* Mark it as dying. */
mod->state = MODULE_STATE_GOING;
return 0;
}
unsigned long module_refcount(struct module *mod)
{
unsigned long incs = 0, decs = 0;
int cpu;
for_each_possible_cpu(cpu)
decs += per_cpu_ptr(mod->refptr, cpu)->decs;
/*
* ensure the incs are added up after the decs.
* module_put ensures incs are visible before decs with smp_wmb.
*
* This 2-count scheme avoids the situation where the refcount
* for CPU0 is read, then CPU0 increments the module refcount,
* then CPU1 drops that refcount, then the refcount for CPU1 is
* read. We would record a decrement but not its corresponding
* increment so we would see a low count (disaster).
*
* Rare situation? But module_refcount can be preempted, and we
* might be tallying up 4096+ CPUs. So it is not impossible.
*/
smp_rmb();
for_each_possible_cpu(cpu)
incs += per_cpu_ptr(mod->refptr, cpu)->incs;
return incs - decs;
return (unsigned long)atomic_read(&mod->refcnt) - MODULE_REF_BASE;
}
EXPORT_SYMBOL(module_refcount);
@ -877,8 +858,10 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod)
seq_printf(m, " %lu ", module_refcount(mod));
/* Always include a trailing , so userspace can differentiate
between this and the old multi-field proc format. */
/*
* Always include a trailing , so userspace can differentiate
* between this and the old multi-field proc format.
*/
list_for_each_entry(use, &mod->source_list, source_list) {
printed_something = 1;
seq_printf(m, "%s,", use->source->name);
@ -886,11 +869,11 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod)
if (mod->init != NULL && mod->exit == NULL) {
printed_something = 1;
seq_printf(m, "[permanent],");
seq_puts(m, "[permanent],");
}
if (!printed_something)
seq_printf(m, "-");
seq_puts(m, "-");
}
void __symbol_put(const char *symbol)
@ -935,7 +918,7 @@ void __module_get(struct module *module)
{
if (module) {
preempt_disable();
__this_cpu_inc(module->refptr->incs);
atomic_inc(&module->refcnt);
trace_module_get(module, _RET_IP_);
preempt_enable();
}
@ -948,11 +931,11 @@ bool try_module_get(struct module *module)
if (module) {
preempt_disable();
if (likely(module_is_live(module))) {
__this_cpu_inc(module->refptr->incs);
/* Note: here, we can fail to get a reference */
if (likely(module_is_live(module) &&
atomic_inc_not_zero(&module->refcnt) != 0))
trace_module_get(module, _RET_IP_);
} else
else
ret = false;
preempt_enable();
@ -963,11 +946,12 @@ EXPORT_SYMBOL(try_module_get);
void module_put(struct module *module)
{
int ret;
if (module) {
preempt_disable();
smp_wmb(); /* see comment in module_refcount */
__this_cpu_inc(module->refptr->decs);
ret = atomic_dec_if_positive(&module->refcnt);
WARN_ON(ret < 0); /* Failed to put refcount */
trace_module_put(module, _RET_IP_);
preempt_enable();
}
@ -978,7 +962,7 @@ EXPORT_SYMBOL(module_put);
static inline void print_unload_info(struct seq_file *m, struct module *mod)
{
/* We don't know the usage count, or what modules are using. */
seq_printf(m, " - -");
seq_puts(m, " - -");
}
static inline void module_unload_free(struct module *mod)
@ -1131,7 +1115,7 @@ static unsigned long maybe_relocated(unsigned long crc,
static int check_version(Elf_Shdr *sechdrs,
unsigned int versindex,
const char *symname,
struct module *mod,
struct module *mod,
const unsigned long *crc,
const struct module *crc_owner)
{
@ -1165,7 +1149,7 @@ static int check_version(Elf_Shdr *sechdrs,
return 0;
bad_version:
printk("%s: disagrees about version of symbol %s\n",
pr_warn("%s: disagrees about version of symbol %s\n",
mod->name, symname);
return 0;
}
@ -1200,7 +1184,7 @@ static inline int same_magic(const char *amagic, const char *bmagic,
static inline int check_version(Elf_Shdr *sechdrs,
unsigned int versindex,
const char *symname,
struct module *mod,
struct module *mod,
const unsigned long *crc,
const struct module *crc_owner)
{
@ -1288,15 +1272,13 @@ static inline bool sect_empty(const Elf_Shdr *sect)
return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0;
}
struct module_sect_attr
{
struct module_sect_attr {
struct module_attribute mattr;
char *name;
unsigned long address;
};
struct module_sect_attrs
{
struct module_sect_attrs {
struct attribute_group grp;
unsigned int nsections;
struct module_sect_attr attrs[0];
@ -1550,7 +1532,8 @@ static int module_add_modinfo_attrs(struct module *mod)
(attr->test && attr->test(mod))) {
memcpy(temp_attr, attr, sizeof(*temp_attr));
sysfs_attr_init(&temp_attr->attr);
error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr);
error = sysfs_create_file(&mod->mkobj.kobj,
&temp_attr->attr);
++temp_attr;
}
}
@ -1566,7 +1549,7 @@ static void module_remove_modinfo_attrs(struct module *mod)
/* pick a field to test for end of list */
if (!attr->attr.name)
break;
sysfs_remove_file(&mod->mkobj.kobj,&attr->attr);
sysfs_remove_file(&mod->mkobj.kobj, &attr->attr);
if (attr->free)
attr->free(mod);
}
@ -1697,18 +1680,6 @@ static void mod_sysfs_teardown(struct module *mod)
mod_sysfs_fini(mod);
}
/*
* unlink the module with the whole machine is stopped with interrupts off
* - this defends against kallsyms not taking locks
*/
static int __unlink_module(void *_mod)
{
struct module *mod = _mod;
list_del(&mod->list);
module_bug_cleanup(mod);
return 0;
}
#ifdef CONFIG_DEBUG_SET_MODULE_RONX
/*
* LKM RO/NX protection: protect module's text/ro-data
@ -1860,7 +1831,12 @@ static void free_module(struct module *mod)
/* Now we can delete it from the lists */
mutex_lock(&module_mutex);
stop_machine(__unlink_module, mod, NULL);
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
/* Remove this module from bug list, this uses list_del_rcu */
module_bug_cleanup(mod);
/* Wait for RCU synchronizing before releasing mod->list and buglist. */
synchronize_rcu();
mutex_unlock(&module_mutex);
/* This may be NULL, but that's OK */
@ -1955,7 +1931,7 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
/* We compiled with -fno-common. These are not
supposed to happen. */
pr_debug("Common symbol: %s\n", name);
printk("%s: please compile with -fno-common\n",
pr_warn("%s: please compile with -fno-common\n",
mod->name);
ret = -ENOEXEC;
break;
@ -2259,7 +2235,7 @@ static char elf_type(const Elf_Sym *sym, const struct load_info *info)
}
static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
unsigned int shnum)
unsigned int shnum)
{
const Elf_Shdr *sec;
@ -2735,7 +2711,7 @@ static int find_module_sections(struct module *mod, struct load_info *info)
* This shouldn't happen with same compiler and binutils
* building all parts of the module.
*/
printk(KERN_WARNING "%s: has both .ctors and .init_array.\n",
pr_warn("%s: has both .ctors and .init_array.\n",
mod->name);
return -EINVAL;
}
@ -3023,8 +2999,10 @@ static int do_init_module(struct module *mod)
if (mod->init != NULL)
ret = do_one_initcall(mod->init);
if (ret < 0) {
/* Init routine failed: abort. Try to protect us from
buggy refcounters. */
/*
* Init routine failed: abort. Try to protect us from
* buggy refcounters.
*/
mod->state = MODULE_STATE_GOING;
synchronize_sched();
module_put(mod);
@ -3202,7 +3180,7 @@ static int complete_formation(struct module *mod, struct load_info *info)
static int unknown_module_param_cb(char *param, char *val, const char *modname)
{
/* Check for magic 'dyndbg' arg */
/* Check for magic 'dyndbg' arg */
int ret = ddebug_dyndbg_module_param_cb(param, val, modname);
if (ret != 0)
pr_warn("%s: unknown parameter '%s' ignored\n", modname, param);
@ -3352,6 +3330,8 @@ static int load_module(struct load_info *info, const char __user *uargs,
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
wake_up_all(&module_wq);
/* Wait for RCU synchronizing before releasing mod->list. */
synchronize_rcu();
mutex_unlock(&module_mutex);
free_module:
module_deallocate(mod, info);
@ -3685,8 +3665,8 @@ static int m_show(struct seq_file *m, void *p)
/* Informative for users. */
seq_printf(m, " %s",
mod->state == MODULE_STATE_GOING ? "Unloading":
mod->state == MODULE_STATE_COMING ? "Loading":
mod->state == MODULE_STATE_GOING ? "Unloading" :
mod->state == MODULE_STATE_COMING ? "Loading" :
"Live");
/* Used by oprofile and other similar tools. */
seq_printf(m, " 0x%pK", mod->module_core);
@ -3695,7 +3675,7 @@ static int m_show(struct seq_file *m, void *p)
if (mod->taints)
seq_printf(m, " %s", module_flags(mod, buf));
seq_printf(m, "\n");
seq_puts(m, "\n");
return 0;
}

View File

@ -603,74 +603,67 @@ static __modinit int add_sysfs_param(struct module_kobject *mk,
const struct kernel_param *kp,
const char *name)
{
struct module_param_attrs *new;
struct attribute **attrs;
int err, num;
struct module_param_attrs *new_mp;
struct attribute **new_attrs;
unsigned int i;
/* We don't bother calling this with invisible parameters. */
BUG_ON(!kp->perm);
if (!mk->mp) {
num = 0;
attrs = NULL;
} else {
num = mk->mp->num;
attrs = mk->mp->grp.attrs;
/* First allocation. */
mk->mp = kzalloc(sizeof(*mk->mp), GFP_KERNEL);
if (!mk->mp)
return -ENOMEM;
mk->mp->grp.name = "parameters";
/* NULL-terminated attribute array. */
mk->mp->grp.attrs = kzalloc(sizeof(mk->mp->grp.attrs[0]),
GFP_KERNEL);
/* Caller will cleanup via free_module_param_attrs */
if (!mk->mp->grp.attrs)
return -ENOMEM;
}
/* Enlarge. */
new = krealloc(mk->mp,
sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (num+1),
GFP_KERNEL);
if (!new) {
kfree(attrs);
err = -ENOMEM;
goto fail;
}
/* Despite looking like the typical realloc() bug, this is safe.
* We *want* the old 'attrs' to be freed either way, and we'll store
* the new one in the success case. */
attrs = krealloc(attrs, sizeof(new->grp.attrs[0])*(num+2), GFP_KERNEL);
if (!attrs) {
err = -ENOMEM;
goto fail_free_new;
}
/* Enlarge allocations. */
new_mp = krealloc(mk->mp,
sizeof(*mk->mp) +
sizeof(mk->mp->attrs[0]) * (mk->mp->num + 1),
GFP_KERNEL);
if (!new_mp)
return -ENOMEM;
mk->mp = new_mp;
/* Sysfs wants everything zeroed. */
memset(new, 0, sizeof(*new));
memset(&new->attrs[num], 0, sizeof(new->attrs[num]));
memset(&attrs[num], 0, sizeof(attrs[num]));
new->grp.name = "parameters";
new->grp.attrs = attrs;
/* Extra pointer for NULL terminator */
new_attrs = krealloc(mk->mp->grp.attrs,
sizeof(mk->mp->grp.attrs[0]) * (mk->mp->num + 2),
GFP_KERNEL);
if (!new_attrs)
return -ENOMEM;
mk->mp->grp.attrs = new_attrs;
/* Tack new one on the end. */
sysfs_attr_init(&new->attrs[num].mattr.attr);
new->attrs[num].param = kp;
new->attrs[num].mattr.show = param_attr_show;
new->attrs[num].mattr.store = param_attr_store;
new->attrs[num].mattr.attr.name = (char *)name;
new->attrs[num].mattr.attr.mode = kp->perm;
new->num = num+1;
sysfs_attr_init(&mk->mp->attrs[mk->mp->num].mattr.attr);
mk->mp->attrs[mk->mp->num].param = kp;
mk->mp->attrs[mk->mp->num].mattr.show = param_attr_show;
/* Do not allow runtime DAC changes to make param writable. */
if ((kp->perm & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0)
mk->mp->attrs[mk->mp->num].mattr.store = param_attr_store;
mk->mp->attrs[mk->mp->num].mattr.attr.name = (char *)name;
mk->mp->attrs[mk->mp->num].mattr.attr.mode = kp->perm;
mk->mp->num++;
/* Fix up all the pointers, since krealloc can move us */
for (num = 0; num < new->num; num++)
new->grp.attrs[num] = &new->attrs[num].mattr.attr;
new->grp.attrs[num] = NULL;
mk->mp = new;
for (i = 0; i < mk->mp->num; i++)
mk->mp->grp.attrs[i] = &mk->mp->attrs[i].mattr.attr;
mk->mp->grp.attrs[mk->mp->num] = NULL;
return 0;
fail_free_new:
kfree(new);
fail:
mk->mp = NULL;
return err;
}
#ifdef CONFIG_MODULES
static void free_module_param_attrs(struct module_kobject *mk)
{
kfree(mk->mp->grp.attrs);
if (mk->mp)
kfree(mk->mp->grp.attrs);
kfree(mk->mp);
mk->mp = NULL;
}
@ -695,8 +688,10 @@ int module_param_sysfs_setup(struct module *mod,
if (kparam[i].perm == 0)
continue;
err = add_sysfs_param(&mod->mkobj, &kparam[i], kparam[i].name);
if (err)
if (err) {
free_module_param_attrs(&mod->mkobj);
return err;
}
params = true;
}

View File

@ -64,16 +64,22 @@ static LIST_HEAD(module_bug_list);
static const struct bug_entry *module_find_bug(unsigned long bugaddr)
{
struct module *mod;
const struct bug_entry *bug = NULL;
list_for_each_entry(mod, &module_bug_list, bug_list) {
const struct bug_entry *bug = mod->bug_table;
rcu_read_lock();
list_for_each_entry_rcu(mod, &module_bug_list, bug_list) {
unsigned i;
bug = mod->bug_table;
for (i = 0; i < mod->num_bugs; ++i, ++bug)
if (bugaddr == bug_addr(bug))
return bug;
goto out;
}
return NULL;
bug = NULL;
out:
rcu_read_unlock();
return bug;
}
void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
@ -99,13 +105,15 @@ void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
* Strictly speaking this should have a spinlock to protect against
* traversals, but since we only traverse on BUG()s, a spinlock
* could potentially lead to deadlock and thus be counter-productive.
* Thus, this uses RCU to safely manipulate the bug list, since BUG
* must run in non-interruptive state.
*/
list_add(&mod->bug_list, &module_bug_list);
list_add_rcu(&mod->bug_list, &module_bug_list);
}
void module_bug_cleanup(struct module *mod)
{
list_del(&mod->bug_list);
list_del_rcu(&mod->bug_list);
}
#else