aboutsummaryrefslogtreecommitdiff
path: root/kernel/events/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/events/core.c')
-rw-r--r--kernel/events/core.c320
1 files changed, 198 insertions, 122 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 78ae7b6f90fd..9efd0d7775e7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -375,6 +375,7 @@ enum event_type_t {
EVENT_TIME = 0x4,
/* see ctx_resched() for details */
EVENT_CPU = 0x8,
+ EVENT_CGROUP = 0x10,
EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
};
@@ -449,8 +450,8 @@ static void update_perf_cpu_limits(void)
static bool perf_rotate_context(struct perf_cpu_pmu_context *cpc);
-int perf_proc_update_handler(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+int perf_event_max_sample_rate_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
int perf_cpu = sysctl_perf_cpu_time_max_percent;
@@ -684,20 +685,26 @@ do { \
___p; \
})
-static void perf_ctx_disable(struct perf_event_context *ctx)
+static void perf_ctx_disable(struct perf_event_context *ctx, bool cgroup)
{
struct perf_event_pmu_context *pmu_ctx;
- list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry)
+ list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
+ if (cgroup && !pmu_ctx->nr_cgroups)
+ continue;
perf_pmu_disable(pmu_ctx->pmu);
+ }
}
-static void perf_ctx_enable(struct perf_event_context *ctx)
+static void perf_ctx_enable(struct perf_event_context *ctx, bool cgroup)
{
struct perf_event_pmu_context *pmu_ctx;
- list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry)
+ list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
+ if (cgroup && !pmu_ctx->nr_cgroups)
+ continue;
perf_pmu_enable(pmu_ctx->pmu);
+ }
}
static void ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type);
@@ -856,9 +863,9 @@ static void perf_cgroup_switch(struct task_struct *task)
return;
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
- perf_ctx_disable(&cpuctx->ctx);
+ perf_ctx_disable(&cpuctx->ctx, true);
- ctx_sched_out(&cpuctx->ctx, EVENT_ALL);
+ ctx_sched_out(&cpuctx->ctx, EVENT_ALL|EVENT_CGROUP);
/*
* must not be done before ctxswout due
* to update_cgrp_time_from_cpuctx() in
@@ -870,9 +877,9 @@ static void perf_cgroup_switch(struct task_struct *task)
* perf_cgroup_set_timestamp() in ctx_sched_in()
* to not have to pass task around
*/
- ctx_sched_in(&cpuctx->ctx, EVENT_ALL);
+ ctx_sched_in(&cpuctx->ctx, EVENT_ALL|EVENT_CGROUP);
- perf_ctx_enable(&cpuctx->ctx);
+ perf_ctx_enable(&cpuctx->ctx, true);
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
@@ -965,6 +972,8 @@ perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ct
if (!is_cgroup_event(event))
return;
+ event->pmu_ctx->nr_cgroups++;
+
/*
* Because cgroup events are always per-cpu events,
* @ctx == &cpuctx->ctx.
@@ -985,6 +994,8 @@ perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *c
if (!is_cgroup_event(event))
return;
+ event->pmu_ctx->nr_cgroups--;
+
/*
* Because cgroup events are always per-cpu events,
* @ctx == &cpuctx->ctx.
@@ -1803,31 +1814,34 @@ static inline void perf_event__state_init(struct perf_event *event)
PERF_EVENT_STATE_INACTIVE;
}
-static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
+static int __perf_event_read_size(u64 read_format, int nr_siblings)
{
int entry = sizeof(u64); /* value */
int size = 0;
int nr = 1;
- if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
size += sizeof(u64);
- if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
size += sizeof(u64);
- if (event->attr.read_format & PERF_FORMAT_ID)
+ if (read_format & PERF_FORMAT_ID)
entry += sizeof(u64);
- if (event->attr.read_format & PERF_FORMAT_LOST)
+ if (read_format & PERF_FORMAT_LOST)
entry += sizeof(u64);
- if (event->attr.read_format & PERF_FORMAT_GROUP) {
+ if (read_format & PERF_FORMAT_GROUP) {
nr += nr_siblings;
size += sizeof(u64);
}
- size += entry * nr;
- event->read_size = size;
+ /*
+ * Since perf_event_validate_size() limits this to 16k and inhibits
+ * adding more siblings, this will never overflow.
+ */
+ return size + nr * entry;
}
static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
@@ -1877,8 +1891,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
*/
static void perf_event__header_size(struct perf_event *event)
{
- __perf_event_read_size(event,
- event->group_leader->nr_siblings);
+ event->read_size =
+ __perf_event_read_size(event->attr.read_format,
+ event->group_leader->nr_siblings);
__perf_event_header_size(event, event->attr.sample_type);
}
@@ -1909,23 +1924,44 @@ static void perf_event__id_header_size(struct perf_event *event)
event->id_header_size = size;
}
+/*
+ * Check that adding an event to the group does not result in anybody
+ * overflowing the 64k event limit imposed by the output buffer.
+ *
+ * Specifically, check that the read_size for the event does not exceed 16k,
+ * read_size being the one term that grows with groups size. Since read_size
+ * depends on per-event read_format, also (re)check the existing events.
+ *
+ * This leaves 48k for the constant size fields and things like callchains,
+ * branch stacks and register sets.
+ */
static bool perf_event_validate_size(struct perf_event *event)
{
- /*
- * The values computed here will be over-written when we actually
- * attach the event.
- */
- __perf_event_read_size(event, event->group_leader->nr_siblings + 1);
- __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ);
- perf_event__id_header_size(event);
+ struct perf_event *sibling, *group_leader = event->group_leader;
+
+ if (__perf_event_read_size(event->attr.read_format,
+ group_leader->nr_siblings + 1) > 16*1024)
+ return false;
+
+ if (__perf_event_read_size(group_leader->attr.read_format,
+ group_leader->nr_siblings + 1) > 16*1024)
+ return false;
/*
- * Sum the lot; should not exceed the 64k limit we have on records.
- * Conservative limit to allow for callchains and other variable fields.
+ * When creating a new group leader, group_leader->ctx is initialized
+ * after the size has been validated, but we cannot safely use
+ * for_each_sibling_event() until group_leader->ctx is set. A new group
+ * leader cannot have any siblings yet, so we can safely skip checking
+ * the non-existent siblings.
*/
- if (event->read_size + event->header_size +
- event->id_header_size + sizeof(struct perf_event_header) >= 16*1024)
- return false;
+ if (event == group_leader)
+ return true;
+
+ for_each_sibling_event(sibling, group_leader) {
+ if (__perf_event_read_size(sibling->attr.read_format,
+ group_leader->nr_siblings + 1) > 16*1024)
+ return false;
+ }
return true;
}
@@ -1954,6 +1990,7 @@ static void perf_group_attach(struct perf_event *event)
list_add_tail(&event->sibling_list, &group_leader->sibling_list);
group_leader->nr_siblings++;
+ group_leader->group_generation++;
perf_event__header_size(group_leader);
@@ -2144,6 +2181,7 @@ static void perf_group_detach(struct perf_event *event)
if (leader != event) {
list_del_init(&event->sibling_list);
event->group_leader->nr_siblings--;
+ event->group_leader->group_generation++;
goto out;
}
@@ -2677,9 +2715,9 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
event_type &= EVENT_ALL;
- perf_ctx_disable(&cpuctx->ctx);
+ perf_ctx_disable(&cpuctx->ctx, false);
if (task_ctx) {
- perf_ctx_disable(task_ctx);
+ perf_ctx_disable(task_ctx, false);
task_ctx_sched_out(task_ctx, event_type);
}
@@ -2697,9 +2735,9 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
perf_event_sched_in(cpuctx, task_ctx);
- perf_ctx_enable(&cpuctx->ctx);
+ perf_ctx_enable(&cpuctx->ctx, false);
if (task_ctx)
- perf_ctx_enable(task_ctx);
+ perf_ctx_enable(task_ctx, false);
}
void perf_pmu_resched(struct pmu *pmu)
@@ -3244,6 +3282,9 @@ ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type)
struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
struct perf_event_pmu_context *pmu_ctx;
int is_active = ctx->is_active;
+ bool cgroup = event_type & EVENT_CGROUP;
+
+ event_type &= ~EVENT_CGROUP;
lockdep_assert_held(&ctx->lock);
@@ -3290,8 +3331,11 @@ ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type)
is_active ^= ctx->is_active; /* changed bits */
- list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry)
+ list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
+ if (cgroup && !pmu_ctx->nr_cgroups)
+ continue;
__pmu_ctx_sched_out(pmu_ctx, is_active);
+ }
}
/*
@@ -3482,7 +3526,7 @@ perf_event_context_sched_out(struct task_struct *task, struct task_struct *next)
raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
if (context_equiv(ctx, next_ctx)) {
- perf_ctx_disable(ctx);
+ perf_ctx_disable(ctx, false);
/* PMIs are disabled; ctx->nr_pending is stable. */
if (local_read(&ctx->nr_pending) ||
@@ -3502,7 +3546,7 @@ perf_event_context_sched_out(struct task_struct *task, struct task_struct *next)
perf_ctx_sched_task_cb(ctx, false);
perf_event_swap_task_ctx_data(ctx, next_ctx);
- perf_ctx_enable(ctx);
+ perf_ctx_enable(ctx, false);
/*
* RCU_INIT_POINTER here is safe because we've not
@@ -3526,13 +3570,13 @@ unlock:
if (do_switch) {
raw_spin_lock(&ctx->lock);
- perf_ctx_disable(ctx);
+ perf_ctx_disable(ctx, false);
inside_switch:
perf_ctx_sched_task_cb(ctx, false);
task_ctx_sched_out(ctx, EVENT_ALL);
- perf_ctx_enable(ctx);
+ perf_ctx_enable(ctx, false);
raw_spin_unlock(&ctx->lock);
}
}
@@ -3818,47 +3862,32 @@ static int merge_sched_in(struct perf_event *event, void *data)
return 0;
}
-static void ctx_pinned_sched_in(struct perf_event_context *ctx, struct pmu *pmu)
+static void pmu_groups_sched_in(struct perf_event_context *ctx,
+ struct perf_event_groups *groups,
+ struct pmu *pmu)
{
- struct perf_event_pmu_context *pmu_ctx;
int can_add_hw = 1;
-
- if (pmu) {
- visit_groups_merge(ctx, &ctx->pinned_groups,
- smp_processor_id(), pmu,
- merge_sched_in, &can_add_hw);
- } else {
- list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
- can_add_hw = 1;
- visit_groups_merge(ctx, &ctx->pinned_groups,
- smp_processor_id(), pmu_ctx->pmu,
- merge_sched_in, &can_add_hw);
- }
- }
+ visit_groups_merge(ctx, groups, smp_processor_id(), pmu,
+ merge_sched_in, &can_add_hw);
}
-static void ctx_flexible_sched_in(struct perf_event_context *ctx, struct pmu *pmu)
+static void ctx_groups_sched_in(struct perf_event_context *ctx,
+ struct perf_event_groups *groups,
+ bool cgroup)
{
struct perf_event_pmu_context *pmu_ctx;
- int can_add_hw = 1;
- if (pmu) {
- visit_groups_merge(ctx, &ctx->flexible_groups,
- smp_processor_id(), pmu,
- merge_sched_in, &can_add_hw);
- } else {
- list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
- can_add_hw = 1;
- visit_groups_merge(ctx, &ctx->flexible_groups,
- smp_processor_id(), pmu_ctx->pmu,
- merge_sched_in, &can_add_hw);
- }
+ list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
+ if (cgroup && !pmu_ctx->nr_cgroups)
+ continue;
+ pmu_groups_sched_in(ctx, groups, pmu_ctx->pmu);
}
}
-static void __pmu_ctx_sched_in(struct perf_event_context *ctx, struct pmu *pmu)
+static void __pmu_ctx_sched_in(struct perf_event_context *ctx,
+ struct pmu *pmu)
{
- ctx_flexible_sched_in(ctx, pmu);
+ pmu_groups_sched_in(ctx, &ctx->flexible_groups, pmu);
}
static void
@@ -3866,6 +3895,9 @@ ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type)
{
struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
int is_active = ctx->is_active;
+ bool cgroup = event_type & EVENT_CGROUP;
+
+ event_type &= ~EVENT_CGROUP;
lockdep_assert_held(&ctx->lock);
@@ -3898,11 +3930,11 @@ ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type)
* in order to give them the best chance of going on.
*/
if (is_active & EVENT_PINNED)
- ctx_pinned_sched_in(ctx, NULL);
+ ctx_groups_sched_in(ctx, &ctx->pinned_groups, cgroup);
/* Then walk through the lower prio flexible groups */
if (is_active & EVENT_FLEXIBLE)
- ctx_flexible_sched_in(ctx, NULL);
+ ctx_groups_sched_in(ctx, &ctx->flexible_groups, cgroup);
}
static void perf_event_context_sched_in(struct task_struct *task)
@@ -3917,11 +3949,11 @@ static void perf_event_context_sched_in(struct task_struct *task)
if (cpuctx->task_ctx == ctx) {
perf_ctx_lock(cpuctx, ctx);
- perf_ctx_disable(ctx);
+ perf_ctx_disable(ctx, false);
perf_ctx_sched_task_cb(ctx, true);
- perf_ctx_enable(ctx);
+ perf_ctx_enable(ctx, false);
perf_ctx_unlock(cpuctx, ctx);
goto rcu_unlock;
}
@@ -3934,7 +3966,7 @@ static void perf_event_context_sched_in(struct task_struct *task)
if (!ctx->nr_events)
goto unlock;
- perf_ctx_disable(ctx);
+ perf_ctx_disable(ctx, false);
/*
* We want to keep the following priority order:
* cpu pinned (that don't need to move), task pinned,
@@ -3944,7 +3976,7 @@ static void perf_event_context_sched_in(struct task_struct *task)
* events, no need to flip the cpuctx's events around.
*/
if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree)) {
- perf_ctx_disable(&cpuctx->ctx);
+ perf_ctx_disable(&cpuctx->ctx, false);
ctx_sched_out(&cpuctx->ctx, EVENT_FLEXIBLE);
}
@@ -3953,9 +3985,9 @@ static void perf_event_context_sched_in(struct task_struct *task)
perf_ctx_sched_task_cb(cpuctx->task_ctx, true);
if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree))
- perf_ctx_enable(&cpuctx->ctx);
+ perf_ctx_enable(&cpuctx->ctx, false);
- perf_ctx_enable(ctx);
+ perf_ctx_enable(ctx, false);
unlock:
perf_ctx_unlock(cpuctx, ctx);
@@ -4425,6 +4457,9 @@ static int __perf_event_read_cpu(struct perf_event *event, int event_cpu)
{
u16 local_pkg, event_pkg;
+ if ((unsigned)event_cpu >= nr_cpu_ids)
+ return event_cpu;
+
if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
int local_cpu = smp_processor_id();
@@ -4527,6 +4562,8 @@ int perf_event_read_local(struct perf_event *event, u64 *value,
u64 *enabled, u64 *running)
{
unsigned long flags;
+ int event_oncpu;
+ int event_cpu;
int ret = 0;
/*
@@ -4551,15 +4588,22 @@ int perf_event_read_local(struct perf_event *event, u64 *value,
goto out;
}
+ /*
+ * Get the event CPU numbers, and adjust them to local if the event is
+ * a per-package event that can be read locally
+ */
+ event_oncpu = __perf_event_read_cpu(event, event->oncpu);
+ event_cpu = __perf_event_read_cpu(event, event->cpu);
+
/* If this is a per-CPU event, it must be for this CPU */
if (!(event->attach_state & PERF_ATTACH_TASK) &&
- event->cpu != smp_processor_id()) {
+ event_cpu != smp_processor_id()) {
ret = -EINVAL;
goto out;
}
/* If this is a pinned event it must be running on this CPU */
- if (event->attr.pinned && event->oncpu != smp_processor_id()) {
+ if (event->attr.pinned && event_oncpu != smp_processor_id()) {
ret = -EBUSY;
goto out;
}
@@ -4569,7 +4613,7 @@ int perf_event_read_local(struct perf_event *event, u64 *value,
* or local to this CPU. Furthermore it means its ACTIVE (otherwise
* oncpu == -1).
*/
- if (event->oncpu == smp_processor_id())
+ if (event_oncpu == smp_processor_id())
event->pmu->read(event);
*value = local64_read(&event->count);
@@ -4809,6 +4853,11 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
void *task_ctx_data = NULL;
if (!ctx->task) {
+ /*
+ * perf_pmu_migrate_context() / __perf_pmu_install_event()
+ * relies on the fact that find_get_pmu_context() cannot fail
+ * for CPU contexts.
+ */
struct perf_cpu_pmu_context *cpc;
cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu);
@@ -5440,7 +5489,7 @@ static int __perf_read_group_add(struct perf_event *leader,
u64 read_format, u64 *values)
{
struct perf_event_context *ctx = leader->ctx;
- struct perf_event *sub;
+ struct perf_event *sub, *parent;
unsigned long flags;
int n = 1; /* skip @nr */
int ret;
@@ -5450,6 +5499,33 @@ static int __perf_read_group_add(struct perf_event *leader,
return ret;
raw_spin_lock_irqsave(&ctx->lock, flags);
+ /*
+ * Verify the grouping between the parent and child (inherited)
+ * events is still in tact.
+ *
+ * Specifically:
+ * - leader->ctx->lock pins leader->sibling_list
+ * - parent->child_mutex pins parent->child_list
+ * - parent->ctx->mutex pins parent->sibling_list
+ *
+ * Because parent->ctx != leader->ctx (and child_list nests inside
+ * ctx->mutex), group destruction is not atomic between children, also
+ * see perf_event_release_kernel(). Additionally, parent can grow the
+ * group.
+ *
+ * Therefore it is possible to have parent and child groups in a
+ * different configuration and summing over such a beast makes no sense
+ * what so ever.
+ *
+ * Reject this.
+ */
+ parent = leader->parent;
+ if (parent &&
+ (parent->group_generation != leader->group_generation ||
+ parent->nr_siblings != leader->nr_siblings)) {
+ ret = -ECHILD;
+ goto unlock;
+ }
/*
* Since we co-schedule groups, {enabled,running} times of siblings
@@ -5483,8 +5559,9 @@ static int __perf_read_group_add(struct perf_event *leader,
values[n++] = atomic64_read(&sub->lost_samples);
}
+unlock:
raw_spin_unlock_irqrestore(&ctx->lock, flags);
- return 0;
+ return ret;
}
static int perf_read_group(struct perf_event *event,
@@ -5503,10 +5580,6 @@ static int perf_read_group(struct perf_event *event,
values[0] = 1 + leader->nr_siblings;
- /*
- * By locking the child_mutex of the leader we effectively
- * lock the child list of all siblings.. XXX explain how.
- */
mutex_lock(&leader->child_mutex);
ret = __perf_read_group_add(leader, read_format, values);
@@ -8249,7 +8322,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
unsigned int size;
memset(comm, 0, sizeof(comm));
- strlcpy(comm, comm_event->task->comm, sizeof(comm));
+ strscpy(comm, comm_event->task->comm, sizeof(comm));
size = ALIGN(strlen(comm)+1, sizeof(u64));
comm_event->comm = comm;
@@ -8631,7 +8704,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
unsigned int size;
char tmp[16];
char *buf = NULL;
- char *name;
+ char *name = NULL;
if (vma->vm_flags & VM_READ)
prot |= PROT_READ;
@@ -8678,33 +8751,22 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
goto got_name;
} else {
- if (vma->vm_ops && vma->vm_ops->name) {
+ if (vma->vm_ops && vma->vm_ops->name)
name = (char *) vma->vm_ops->name(vma);
- if (name)
- goto cpy_name;
+ if (!name)
+ name = (char *)arch_vma_name(vma);
+ if (!name) {
+ if (vma_is_initial_heap(vma))
+ name = "[heap]";
+ else if (vma_is_initial_stack(vma))
+ name = "[stack]";
+ else
+ name = "//anon";
}
-
- name = (char *)arch_vma_name(vma);
- if (name)
- goto cpy_name;
-
- if (vma->vm_start <= vma->vm_mm->start_brk &&
- vma->vm_end >= vma->vm_mm->brk) {
- name = "[heap]";
- goto cpy_name;
- }
- if (vma->vm_start <= vma->vm_mm->start_stack &&
- vma->vm_end >= vma->vm_mm->start_stack) {
- name = "[stack]";
- goto cpy_name;
- }
-
- name = "//anon";
- goto cpy_name;
}
cpy_name:
- strlcpy(tmp, name, sizeof(tmp));
+ strscpy(tmp, name, sizeof(tmp));
name = tmp;
got_name:
/*
@@ -9128,7 +9190,7 @@ void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN)
goto err;
- strlcpy(name, sym, KSYM_NAME_LEN);
+ strscpy(name, sym, KSYM_NAME_LEN);
name_len = strlen(name) + 1;
while (!IS_ALIGNED(name_len, sizeof(u64)))
name[name_len++] = '\0';
@@ -9595,16 +9657,16 @@ u64 perf_swevent_set_period(struct perf_event *event)
hwc->last_period = hwc->sample_period;
-again:
- old = val = local64_read(&hwc->period_left);
- if (val < 0)
- return 0;
+ old = local64_read(&hwc->period_left);
+ do {
+ val = old;
+ if (val < 0)
+ return 0;
- nr = div64_u64(period + val, period);
- offset = nr * period;
- val -= offset;
- if (local64_cmpxchg(&hwc->period_left, old, val) != old)
- goto again;
+ nr = div64_u64(period + val, period);
+ offset = nr * period;
+ val -= offset;
+ } while (!local64_try_cmpxchg(&hwc->period_left, &old, val));
return nr;
}
@@ -12857,6 +12919,9 @@ static void __perf_pmu_install_event(struct pmu *pmu,
int cpu, struct perf_event *event)
{
struct perf_event_pmu_context *epc;
+ struct perf_event_context *old_ctx = event->ctx;
+
+ get_ctx(ctx); /* normally find_get_context() */
event->cpu = cpu;
epc = find_get_pmu_context(pmu, ctx, event);
@@ -12865,6 +12930,11 @@ static void __perf_pmu_install_event(struct pmu *pmu,
if (event->state >= PERF_EVENT_STATE_OFF)
event->state = PERF_EVENT_STATE_INACTIVE;
perf_install_in_context(ctx, event, cpu);
+
+ /*
+ * Now that event->ctx is updated and visible, put the old ctx.
+ */
+ put_ctx(old_ctx);
}
static void __perf_pmu_install(struct perf_event_context *ctx,
@@ -12903,6 +12973,10 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
struct perf_event_context *src_ctx, *dst_ctx;
LIST_HEAD(events);
+ /*
+ * Since per-cpu context is persistent, no need to grab an extra
+ * reference.
+ */
src_ctx = &per_cpu_ptr(&perf_cpu_context, src_cpu)->ctx;
dst_ctx = &per_cpu_ptr(&perf_cpu_context, dst_cpu)->ctx;
@@ -13357,6 +13431,8 @@ static int inherit_group(struct perf_event *parent_event,
!perf_get_aux_event(child_ctr, leader))
return -EINVAL;
}
+ if (leader)
+ leader->group_generation = parent_event->group_generation;
return 0;
}