Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
 "The biggest changes are fixes for races that kept triggering Trinity
  crashes, plus liblockdep build fixes and smaller misc fixes.

  The liblockdep bits in perf/urgent are a pull mistake - they should
  have been in locking/urgent - but by the time I noticed other commits
  were added and testing was done :-/ Sorry about that"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf: Fix a race between ring_buffer_detach() and ring_buffer_attach()
  perf: Prevent false warning in perf_swevent_add
  perf: Limit perf_event_attr::sample_period to 63 bits
  tools/liblockdep: Remove all build files when doing make clean
  tools/liblockdep: Build liblockdep from tools/Makefile
  perf/x86/intel: Fix Silvermont's event constraints
  perf: Fix perf_event_init_context()
  perf: Fix race in removing an event
This commit is contained in:
Linus Torvalds 2014-05-23 10:02:34 -07:00
commit e6a32c3ad1
5 changed files with 102 additions and 86 deletions

View File

@ -169,7 +169,6 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
EVENT_CONSTRAINT_END
};

View File

@ -402,6 +402,8 @@ struct perf_event {
struct ring_buffer *rb;
struct list_head rb_entry;
unsigned long rcu_batches;
int rcu_pending;
/* poll related */
wait_queue_head_t waitq;

View File

@ -1443,6 +1443,11 @@ group_sched_out(struct perf_event *group_event,
cpuctx->exclusive = 0;
}
struct remove_event {
struct perf_event *event;
bool detach_group;
};
/*
* Cross CPU call to remove a performance event
*
@ -1451,12 +1456,15 @@ group_sched_out(struct perf_event *group_event,
*/
static int __perf_remove_from_context(void *info)
{
struct perf_event *event = info;
struct remove_event *re = info;
struct perf_event *event = re->event;
struct perf_event_context *ctx = event->ctx;
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
raw_spin_lock(&ctx->lock);
event_sched_out(event, cpuctx, ctx);
if (re->detach_group)
perf_group_detach(event);
list_del_event(event, ctx);
if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
ctx->is_active = 0;
@ -1481,10 +1489,14 @@ static int __perf_remove_from_context(void *info)
* When called from perf_event_exit_task, it's OK because the
* context has been detached from its task.
*/
static void perf_remove_from_context(struct perf_event *event)
static void perf_remove_from_context(struct perf_event *event, bool detach_group)
{
struct perf_event_context *ctx = event->ctx;
struct task_struct *task = ctx->task;
struct remove_event re = {
.event = event,
.detach_group = detach_group,
};
lockdep_assert_held(&ctx->mutex);
@ -1493,12 +1505,12 @@ static void perf_remove_from_context(struct perf_event *event)
* Per cpu events are removed via an smp call and
* the removal is always successful.
*/
cpu_function_call(event->cpu, __perf_remove_from_context, event);
cpu_function_call(event->cpu, __perf_remove_from_context, &re);
return;
}
retry:
if (!task_function_call(task, __perf_remove_from_context, event))
if (!task_function_call(task, __perf_remove_from_context, &re))
return;
raw_spin_lock_irq(&ctx->lock);
@ -1515,6 +1527,8 @@ static void perf_remove_from_context(struct perf_event *event)
* Since the task isn't running, its safe to remove the event, us
* holding the ctx->lock ensures the task won't get scheduled in.
*/
if (detach_group)
perf_group_detach(event);
list_del_event(event, ctx);
raw_spin_unlock_irq(&ctx->lock);
}
@ -3178,7 +3192,8 @@ static void free_event_rcu(struct rcu_head *head)
}
static void ring_buffer_put(struct ring_buffer *rb);
static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb);
static void ring_buffer_attach(struct perf_event *event,
struct ring_buffer *rb);
static void unaccount_event_cpu(struct perf_event *event, int cpu)
{
@ -3238,8 +3253,6 @@ static void free_event(struct perf_event *event)
unaccount_event(event);
if (event->rb) {
struct ring_buffer *rb;
/*
* Can happen when we close an event with re-directed output.
*
@ -3247,12 +3260,7 @@ static void free_event(struct perf_event *event)
* over us; possibly making our ring_buffer_put() the last.
*/
mutex_lock(&event->mmap_mutex);
rb = event->rb;
if (rb) {
rcu_assign_pointer(event->rb, NULL);
ring_buffer_detach(event, rb);
ring_buffer_put(rb); /* could be last */
}
ring_buffer_attach(event, NULL);
mutex_unlock(&event->mmap_mutex);
}
@ -3281,10 +3289,7 @@ int perf_event_release_kernel(struct perf_event *event)
* to trigger the AB-BA case.
*/
mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
raw_spin_lock_irq(&ctx->lock);
perf_group_detach(event);
raw_spin_unlock_irq(&ctx->lock);
perf_remove_from_context(event);
perf_remove_from_context(event, true);
mutex_unlock(&ctx->mutex);
free_event(event);
@ -3839,28 +3844,47 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
static void ring_buffer_attach(struct perf_event *event,
struct ring_buffer *rb)
{
struct ring_buffer *old_rb = NULL;
unsigned long flags;
if (!list_empty(&event->rb_entry))
return;
if (event->rb) {
/*
* Should be impossible, we set this when removing
* event->rb_entry and wait/clear when adding event->rb_entry.
*/
WARN_ON_ONCE(event->rcu_pending);
spin_lock_irqsave(&rb->event_lock, flags);
if (list_empty(&event->rb_entry))
list_add(&event->rb_entry, &rb->event_list);
spin_unlock_irqrestore(&rb->event_lock, flags);
}
old_rb = event->rb;
event->rcu_batches = get_state_synchronize_rcu();
event->rcu_pending = 1;
static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb)
{
unsigned long flags;
spin_lock_irqsave(&old_rb->event_lock, flags);
list_del_rcu(&event->rb_entry);
spin_unlock_irqrestore(&old_rb->event_lock, flags);
}
if (list_empty(&event->rb_entry))
return;
if (event->rcu_pending && rb) {
cond_synchronize_rcu(event->rcu_batches);
event->rcu_pending = 0;
}
spin_lock_irqsave(&rb->event_lock, flags);
list_del_init(&event->rb_entry);
wake_up_all(&event->waitq);
spin_unlock_irqrestore(&rb->event_lock, flags);
if (rb) {
spin_lock_irqsave(&rb->event_lock, flags);
list_add_rcu(&event->rb_entry, &rb->event_list);
spin_unlock_irqrestore(&rb->event_lock, flags);
}
rcu_assign_pointer(event->rb, rb);
if (old_rb) {
ring_buffer_put(old_rb);
/*
* Since we detached before setting the new rb, so that we
* could attach the new rb, we could have missed a wakeup.
* Provide it now.
*/
wake_up_all(&event->waitq);
}
}
static void ring_buffer_wakeup(struct perf_event *event)
@ -3929,7 +3953,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
{
struct perf_event *event = vma->vm_file->private_data;
struct ring_buffer *rb = event->rb;
struct ring_buffer *rb = ring_buffer_get(event);
struct user_struct *mmap_user = rb->mmap_user;
int mmap_locked = rb->mmap_locked;
unsigned long size = perf_data_size(rb);
@ -3937,18 +3961,14 @@ static void perf_mmap_close(struct vm_area_struct *vma)
atomic_dec(&rb->mmap_count);
if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
return;
goto out_put;
/* Detach current event from the buffer. */
rcu_assign_pointer(event->rb, NULL);
ring_buffer_detach(event, rb);
ring_buffer_attach(event, NULL);
mutex_unlock(&event->mmap_mutex);
/* If there's still other mmap()s of this buffer, we're done. */
if (atomic_read(&rb->mmap_count)) {
ring_buffer_put(rb); /* can't be last */
return;
}
if (atomic_read(&rb->mmap_count))
goto out_put;
/*
* No other mmap()s, detach from all other events that might redirect
@ -3978,11 +3998,9 @@ static void perf_mmap_close(struct vm_area_struct *vma)
* still restart the iteration to make sure we're not now
* iterating the wrong list.
*/
if (event->rb == rb) {
rcu_assign_pointer(event->rb, NULL);
ring_buffer_detach(event, rb);
ring_buffer_put(rb); /* can't be last, we still have one */
}
if (event->rb == rb)
ring_buffer_attach(event, NULL);
mutex_unlock(&event->mmap_mutex);
put_event(event);
@ -4007,6 +4025,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
vma->vm_mm->pinned_vm -= mmap_locked;
free_uid(mmap_user);
out_put:
ring_buffer_put(rb); /* could be last */
}
@ -4124,7 +4143,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_mm->pinned_vm += extra;
ring_buffer_attach(event, rb);
rcu_assign_pointer(event->rb, rb);
perf_event_init_userpage(event);
perf_event_update_userpage(event);
@ -5408,6 +5426,9 @@ struct swevent_htable {
/* Recursion avoidance in each contexts */
int recursion[PERF_NR_CONTEXTS];
/* Keeps track of cpu being initialized/exited */
bool online;
};
static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
@ -5654,8 +5675,14 @@ static int perf_swevent_add(struct perf_event *event, int flags)
hwc->state = !(flags & PERF_EF_START);
head = find_swevent_head(swhash, event);
if (WARN_ON_ONCE(!head))
if (!head) {
/*
* We can race with cpu hotplug code. Do not
* WARN if the cpu just got unplugged.
*/
WARN_ON_ONCE(swhash->online);
return -EINVAL;
}
hlist_add_head_rcu(&event->hlist_entry, head);
@ -6914,7 +6941,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
static int
perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
{
struct ring_buffer *rb = NULL, *old_rb = NULL;
struct ring_buffer *rb = NULL;
int ret = -EINVAL;
if (!output_event)
@ -6942,8 +6969,6 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
if (atomic_read(&event->mmap_count))
goto unlock;
old_rb = event->rb;
if (output_event) {
/* get the rb we want to redirect to */
rb = ring_buffer_get(output_event);
@ -6951,23 +6976,7 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
goto unlock;
}
if (old_rb)
ring_buffer_detach(event, old_rb);
if (rb)
ring_buffer_attach(event, rb);
rcu_assign_pointer(event->rb, rb);
if (old_rb) {
ring_buffer_put(old_rb);
/*
* Since we detached before setting the new rb, so that we
* could attach the new rb, we could have missed a wakeup.
* Provide it now.
*/
wake_up_all(&event->waitq);
}
ring_buffer_attach(event, rb);
ret = 0;
unlock:
@ -7018,6 +7027,9 @@ SYSCALL_DEFINE5(perf_event_open,
if (attr.freq) {
if (attr.sample_freq > sysctl_perf_event_sample_rate)
return -EINVAL;
} else {
if (attr.sample_period & (1ULL << 63))
return -EINVAL;
}
/*
@ -7165,7 +7177,7 @@ SYSCALL_DEFINE5(perf_event_open,
struct perf_event_context *gctx = group_leader->ctx;
mutex_lock(&gctx->mutex);
perf_remove_from_context(group_leader);
perf_remove_from_context(group_leader, false);
/*
* Removing from the context ends up with disabled
@ -7175,7 +7187,7 @@ SYSCALL_DEFINE5(perf_event_open,
perf_event__state_init(group_leader);
list_for_each_entry(sibling, &group_leader->sibling_list,
group_entry) {
perf_remove_from_context(sibling);
perf_remove_from_context(sibling, false);
perf_event__state_init(sibling);
put_ctx(gctx);
}
@ -7305,7 +7317,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
mutex_lock(&src_ctx->mutex);
list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
event_entry) {
perf_remove_from_context(event);
perf_remove_from_context(event, false);
unaccount_event_cpu(event, src_cpu);
put_ctx(src_ctx);
list_add(&event->migrate_entry, &events);
@ -7367,13 +7379,7 @@ __perf_event_exit_task(struct perf_event *child_event,
struct perf_event_context *child_ctx,
struct task_struct *child)
{
if (child_event->parent) {
raw_spin_lock_irq(&child_ctx->lock);
perf_group_detach(child_event);
raw_spin_unlock_irq(&child_ctx->lock);
}
perf_remove_from_context(child_event);
perf_remove_from_context(child_event, !!child_event->parent);
/*
* It can happen that the parent exits first, and has events
@ -7724,6 +7730,8 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
* swapped under us.
*/
parent_ctx = perf_pin_task_context(parent, ctxn);
if (!parent_ctx)
return 0;
/*
* No need to check if parent_ctx != NULL here; since we saw
@ -7835,6 +7843,7 @@ static void perf_event_init_cpu(int cpu)
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
mutex_lock(&swhash->hlist_mutex);
swhash->online = true;
if (swhash->hlist_refcount > 0) {
struct swevent_hlist *hlist;
@ -7857,14 +7866,14 @@ static void perf_pmu_rotate_stop(struct pmu *pmu)
static void __perf_event_exit_context(void *__info)
{
struct remove_event re = { .detach_group = false };
struct perf_event_context *ctx = __info;
struct perf_event *event;
perf_pmu_rotate_stop(ctx->pmu);
rcu_read_lock();
list_for_each_entry_rcu(event, &ctx->event_list, event_entry)
__perf_remove_from_context(event);
list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
__perf_remove_from_context(&re);
rcu_read_unlock();
}
@ -7892,6 +7901,7 @@ static void perf_event_exit_cpu(int cpu)
perf_event_exit_cpu_context(cpu);
mutex_lock(&swhash->hlist_mutex);
swhash->online = false;
swevent_hlist_release(swhash);
mutex_unlock(&swhash->hlist_mutex);
}

View File

@ -44,6 +44,9 @@ cpupower: FORCE
cgroup firewire hv guest usb virtio vm net: FORCE
$(call descend,$@)
liblockdep: FORCE
$(call descend,lib/lockdep)
libapikfs: FORCE
$(call descend,lib/api)
@ -91,6 +94,9 @@ cpupower_clean:
cgroup_clean hv_clean firewire_clean lguest_clean usb_clean virtio_clean vm_clean net_clean:
$(call descend,$(@:_clean=),clean)
liblockdep_clean:
$(call descend,lib/lockdep,clean)
libapikfs_clean:
$(call descend,lib/api,clean)

View File

@ -1,8 +1,7 @@
# file format version
FILE_VERSION = 1
MAKEFLAGS += --no-print-directory
LIBLOCKDEP_VERSION=$(shell make -sC ../../.. kernelversion)
LIBLOCKDEP_VERSION=$(shell make --no-print-directory -sC ../../.. kernelversion)
# Makefiles suck: This macro sets a default value of $(2) for the
# variable named by $(1), unless the variable has been set by
@ -231,7 +230,7 @@ install_lib: all_cmd
install: install_lib
clean:
$(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d
$(RM) *.o *~ $(TARGETS) *.a *liblockdep*.so* $(VERSION_FILES) .*.d
$(RM) tags TAGS
endif # skip-makefile