aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/btf.c5
-rw-r--r--kernel/bpf/cgroup_iter.c2
-rw-r--r--kernel/bpf/dispatcher.c6
-rw-r--r--kernel/bpf/memalloc.c18
-rw-r--r--kernel/bpf/verifier.c1
-rw-r--r--kernel/cgroup/cgroup.c99
-rw-r--r--kernel/events/core.c152
-rw-r--r--kernel/events/ring_buffer.c2
-rw-r--r--kernel/gcov/gcc_4_7.c18
-rw-r--r--kernel/power/hibernate.c2
-rw-r--r--kernel/rcu/tree.c10
-rw-r--r--kernel/sched/core.c24
-rw-r--r--kernel/sched/deadline.c4
-rw-r--r--kernel/sched/rt.c4
-rw-r--r--kernel/sched/sched.h32
-rw-r--r--kernel/trace/blktrace.c82
-rw-r--r--kernel/trace/bpf_trace.c2
-rw-r--r--kernel/utsname_sysctl.c1
18 files changed, 323 insertions, 141 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index eba603cec2c5..35c07afac924 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -4436,6 +4436,11 @@ static int btf_func_proto_check(struct btf_verifier_env *env,
return -EINVAL;
}
+ if (btf_type_is_resolve_source_only(ret_type)) {
+ btf_verifier_log_type(env, t, "Invalid return type");
+ return -EINVAL;
+ }
+
if (btf_type_needs_resolve(ret_type) &&
!env_type_is_resolved(env, ret_type_id)) {
err = btf_resolve(env, ret_type, ret_type_id);
diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c
index 0d200a993489..9fcf09f2ef00 100644
--- a/kernel/bpf/cgroup_iter.c
+++ b/kernel/bpf/cgroup_iter.c
@@ -196,7 +196,7 @@ static int bpf_iter_attach_cgroup(struct bpf_prog *prog,
return -EINVAL;
if (fd)
- cgrp = cgroup_get_from_fd(fd);
+ cgrp = cgroup_v1v2_get_from_fd(fd);
else if (id)
cgrp = cgroup_get_from_id(id);
else /* walk the entire hierarchy by default. */
diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c
index fa64b80b8bca..04f0a045dcaa 100644
--- a/kernel/bpf/dispatcher.c
+++ b/kernel/bpf/dispatcher.c
@@ -4,6 +4,7 @@
#include <linux/hash.h>
#include <linux/bpf.h>
#include <linux/filter.h>
+#include <linux/init.h>
/* The BPF dispatcher is a multiway branch code generator. The
* dispatcher is a mechanism to avoid the performance penalty of an
@@ -90,6 +91,11 @@ int __weak arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int n
return -ENOTSUPP;
}
+int __weak __init bpf_arch_init_dispatcher_early(void *ip)
+{
+ return -ENOTSUPP;
+}
+
static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *buf)
{
s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0];
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 5f83be1d2018..4901fa1048cd 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -418,14 +418,17 @@ static void drain_mem_cache(struct bpf_mem_cache *c)
/* No progs are using this bpf_mem_cache, but htab_map_free() called
* bpf_mem_cache_free() for all remaining elements and they can be in
* free_by_rcu or in waiting_for_gp lists, so drain those lists now.
+ *
+ * Except for waiting_for_gp list, there are no concurrent operations
+ * on these lists, so it is safe to use __llist_del_all().
*/
llist_for_each_safe(llnode, t, __llist_del_all(&c->free_by_rcu))
free_one(c, llnode);
llist_for_each_safe(llnode, t, llist_del_all(&c->waiting_for_gp))
free_one(c, llnode);
- llist_for_each_safe(llnode, t, llist_del_all(&c->free_llist))
+ llist_for_each_safe(llnode, t, __llist_del_all(&c->free_llist))
free_one(c, llnode);
- llist_for_each_safe(llnode, t, llist_del_all(&c->free_llist_extra))
+ llist_for_each_safe(llnode, t, __llist_del_all(&c->free_llist_extra))
free_one(c, llnode);
}
@@ -493,6 +496,16 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma)
rcu_in_progress = 0;
for_each_possible_cpu(cpu) {
c = per_cpu_ptr(ma->cache, cpu);
+ /*
+ * refill_work may be unfinished for PREEMPT_RT kernel
+ * in which irq work is invoked in a per-CPU RT thread.
+ * It is also possible for kernel with
+ * arch_irq_work_has_interrupt() being false and irq
+ * work is invoked in timer interrupt. So waiting for
+ * the completion of irq work to ease the handling of
+ * concurrency.
+ */
+ irq_work_sync(&c->refill_work);
drain_mem_cache(c);
rcu_in_progress += atomic_read(&c->call_rcu_in_progress);
}
@@ -507,6 +520,7 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma)
cc = per_cpu_ptr(ma->caches, cpu);
for (i = 0; i < NUM_CACHES; i++) {
c = &cc->cache[i];
+ irq_work_sync(&c->refill_work);
drain_mem_cache(c);
rcu_in_progress += atomic_read(&c->call_rcu_in_progress);
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 014ee0953dbd..7f0a9f6cb889 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6946,6 +6946,7 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
callee->in_callback_fn = true;
+ callee->callback_ret_range = tnum_range(0, 1);
return 0;
}
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 7f486677ab1f..2319946715e0 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1392,6 +1392,9 @@ static void cgroup_destroy_root(struct cgroup_root *root)
cgroup_free_root(root);
}
+/*
+ * Returned cgroup is without refcount but it's valid as long as cset pins it.
+ */
static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
struct cgroup_root *root)
{
@@ -1403,6 +1406,7 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
res_cgroup = cset->dfl_cgrp;
} else {
struct cgrp_cset_link *link;
+ lockdep_assert_held(&css_set_lock);
list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
struct cgroup *c = link->cgrp;
@@ -1414,6 +1418,7 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
}
}
+ BUG_ON(!res_cgroup);
return res_cgroup;
}
@@ -1436,23 +1441,36 @@ current_cgns_cgroup_from_root(struct cgroup_root *root)
rcu_read_unlock();
- BUG_ON(!res);
return res;
}
+/*
+ * Look up cgroup associated with current task's cgroup namespace on the default
+ * hierarchy.
+ *
+ * Unlike current_cgns_cgroup_from_root(), this doesn't need locks:
+ * - Internal rcu_read_lock is unnecessary because we don't dereference any rcu
+ * pointers.
+ * - css_set_lock is not needed because we just read cset->dfl_cgrp.
+ * - As a bonus returned cgrp is pinned with the current because it cannot
+ * switch cgroup_ns asynchronously.
+ */
+static struct cgroup *current_cgns_cgroup_dfl(void)
+{
+ struct css_set *cset;
+
+ cset = current->nsproxy->cgroup_ns->root_cset;
+ return __cset_cgroup_from_root(cset, &cgrp_dfl_root);
+}
+
/* look up cgroup associated with given css_set on the specified hierarchy */
static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
struct cgroup_root *root)
{
- struct cgroup *res = NULL;
-
lockdep_assert_held(&cgroup_mutex);
lockdep_assert_held(&css_set_lock);
- res = __cset_cgroup_from_root(cset, root);
-
- BUG_ON(!res);
- return res;
+ return __cset_cgroup_from_root(cset, root);
}
/*
@@ -6191,9 +6209,7 @@ struct cgroup *cgroup_get_from_id(u64 id)
if (!cgrp)
return ERR_PTR(-ENOENT);
- spin_lock_irq(&css_set_lock);
- root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root);
- spin_unlock_irq(&css_set_lock);
+ root_cgrp = current_cgns_cgroup_dfl();
if (!cgroup_is_descendant(cgrp, root_cgrp)) {
cgroup_put(cgrp);
return ERR_PTR(-ENOENT);
@@ -6294,16 +6310,42 @@ void cgroup_fork(struct task_struct *child)
INIT_LIST_HEAD(&child->cg_list);
}
-static struct cgroup *cgroup_get_from_file(struct file *f)
+/**
+ * cgroup_v1v2_get_from_file - get a cgroup pointer from a file pointer
+ * @f: file corresponding to cgroup_dir
+ *
+ * Find the cgroup from a file pointer associated with a cgroup directory.
+ * Returns a pointer to the cgroup on success. ERR_PTR is returned if the
+ * cgroup cannot be found.
+ */
+static struct cgroup *cgroup_v1v2_get_from_file(struct file *f)
{
struct cgroup_subsys_state *css;
- struct cgroup *cgrp;
css = css_tryget_online_from_dir(f->f_path.dentry, NULL);
if (IS_ERR(css))
return ERR_CAST(css);
- cgrp = css->cgroup;
+ return css->cgroup;
+}
+
+/**
+ * cgroup_get_from_file - same as cgroup_v1v2_get_from_file, but only supports
+ * cgroup2.
+ * @f: file corresponding to cgroup2_dir
+ */
+static struct cgroup *cgroup_get_from_file(struct file *f)
+{
+ struct cgroup *cgrp = cgroup_v1v2_get_from_file(f);
+
+ if (IS_ERR(cgrp))
+ return ERR_CAST(cgrp);
+
+ if (!cgroup_on_dfl(cgrp)) {
+ cgroup_put(cgrp);
+ return ERR_PTR(-EBADF);
+ }
+
return cgrp;
}
@@ -6772,10 +6814,8 @@ struct cgroup *cgroup_get_from_path(const char *path)
struct cgroup *cgrp = ERR_PTR(-ENOENT);
struct cgroup *root_cgrp;
- spin_lock_irq(&css_set_lock);
- root_cgrp = current_cgns_cgroup_from_root(&cgrp_dfl_root);
+ root_cgrp = current_cgns_cgroup_dfl();
kn = kernfs_walk_and_get(root_cgrp->kn, path);
- spin_unlock_irq(&css_set_lock);
if (!kn)
goto out;
@@ -6800,15 +6840,15 @@ out:
EXPORT_SYMBOL_GPL(cgroup_get_from_path);
/**
- * cgroup_get_from_fd - get a cgroup pointer from a fd
- * @fd: fd obtained by open(cgroup2_dir)
+ * cgroup_v1v2_get_from_fd - get a cgroup pointer from a fd
+ * @fd: fd obtained by open(cgroup_dir)
*
* Find the cgroup from a fd which should be obtained
* by opening a cgroup directory. Returns a pointer to the
* cgroup on success. ERR_PTR is returned if the cgroup
* cannot be found.
*/
-struct cgroup *cgroup_get_from_fd(int fd)
+struct cgroup *cgroup_v1v2_get_from_fd(int fd)
{
struct cgroup *cgrp;
struct file *f;
@@ -6817,10 +6857,29 @@ struct cgroup *cgroup_get_from_fd(int fd)
if (!f)
return ERR_PTR(-EBADF);
- cgrp = cgroup_get_from_file(f);
+ cgrp = cgroup_v1v2_get_from_file(f);
fput(f);
return cgrp;
}
+
+/**
+ * cgroup_get_from_fd - same as cgroup_v1v2_get_from_fd, but only supports
+ * cgroup2.
+ * @fd: fd obtained by open(cgroup2_dir)
+ */
+struct cgroup *cgroup_get_from_fd(int fd)
+{
+ struct cgroup *cgrp = cgroup_v1v2_get_from_fd(fd);
+
+ if (IS_ERR(cgrp))
+ return ERR_CAST(cgrp);
+
+ if (!cgroup_on_dfl(cgrp)) {
+ cgroup_put(cgrp);
+ return ERR_PTR(-EBADF);
+ }
+ return cgrp;
+}
EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
static u64 power_of_ten(int power)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index aefc1e08e015..4ec3717003d5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -54,6 +54,7 @@
#include <linux/highmem.h>
#include <linux/pgtable.h>
#include <linux/buildid.h>
+#include <linux/task_work.h>
#include "internal.h"
@@ -2276,11 +2277,26 @@ event_sched_out(struct perf_event *event,
event->pmu->del(event, 0);
event->oncpu = -1;
- if (READ_ONCE(event->pending_disable) >= 0) {
- WRITE_ONCE(event->pending_disable, -1);
+ if (event->pending_disable) {
+ event->pending_disable = 0;
perf_cgroup_event_disable(event, ctx);
state = PERF_EVENT_STATE_OFF;
}
+
+ if (event->pending_sigtrap) {
+ bool dec = true;
+
+ event->pending_sigtrap = 0;
+ if (state != PERF_EVENT_STATE_OFF &&
+ !event->pending_work) {
+ event->pending_work = 1;
+ dec = false;
+ task_work_add(current, &event->pending_task, TWA_RESUME);
+ }
+ if (dec)
+ local_dec(&event->ctx->nr_pending);
+ }
+
perf_event_set_state(event, state);
if (!is_software_event(event))
@@ -2432,7 +2448,7 @@ static void __perf_event_disable(struct perf_event *event,
* hold the top-level event's child_mutex, so any descendant that
* goes to exit will block in perf_event_exit_event().
*
- * When called from perf_pending_event it's OK because event->ctx
+ * When called from perf_pending_irq it's OK because event->ctx
* is the current context on this CPU and preemption is disabled,
* hence we can't get into perf_event_task_sched_out for this context.
*/
@@ -2471,9 +2487,8 @@ EXPORT_SYMBOL_GPL(perf_event_disable);
void perf_event_disable_inatomic(struct perf_event *event)
{
- WRITE_ONCE(event->pending_disable, smp_processor_id());
- /* can fail, see perf_pending_event_disable() */
- irq_work_queue(&event->pending);
+ event->pending_disable = 1;
+ irq_work_queue(&event->pending_irq);
}
#define MAX_INTERRUPTS (~0ULL)
@@ -3428,11 +3443,23 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
if (context_equiv(ctx, next_ctx)) {
+ perf_pmu_disable(pmu);
+
+ /* PMIs are disabled; ctx->nr_pending is stable. */
+ if (local_read(&ctx->nr_pending) ||
+ local_read(&next_ctx->nr_pending)) {
+ /*
+ * Must not swap out ctx when there's pending
+ * events that rely on the ctx->task relation.
+ */
+ raw_spin_unlock(&next_ctx->lock);
+ rcu_read_unlock();
+ goto inside_switch;
+ }
+
WRITE_ONCE(ctx->task, next);
WRITE_ONCE(next_ctx->task, task);
- perf_pmu_disable(pmu);
-
if (cpuctx->sched_cb_usage && pmu->sched_task)
pmu->sched_task(ctx, false);
@@ -3473,6 +3500,7 @@ unlock:
raw_spin_lock(&ctx->lock);
perf_pmu_disable(pmu);
+inside_switch:
if (cpuctx->sched_cb_usage && pmu->sched_task)
pmu->sched_task(ctx, false);
task_ctx_sched_out(cpuctx, ctx, EVENT_ALL);
@@ -4939,7 +4967,7 @@ static void perf_addr_filters_splice(struct perf_event *event,
static void _free_event(struct perf_event *event)
{
- irq_work_sync(&event->pending);
+ irq_work_sync(&event->pending_irq);
unaccount_event(event);
@@ -6439,7 +6467,8 @@ static void perf_sigtrap(struct perf_event *event)
return;
/*
- * perf_pending_event() can race with the task exiting.
+ * Both perf_pending_task() and perf_pending_irq() can race with the
+ * task exiting.
*/
if (current->flags & PF_EXITING)
return;
@@ -6448,23 +6477,33 @@ static void perf_sigtrap(struct perf_event *event)
event->attr.type, event->attr.sig_data);
}
-static void perf_pending_event_disable(struct perf_event *event)
+/*
+ * Deliver the pending work in-event-context or follow the context.
+ */
+static void __perf_pending_irq(struct perf_event *event)
{
- int cpu = READ_ONCE(event->pending_disable);
+ int cpu = READ_ONCE(event->oncpu);
+ /*
+ * If the event isn't running; we done. event_sched_out() will have
+ * taken care of things.
+ */
if (cpu < 0)
return;
+ /*
+ * Yay, we hit home and are in the context of the event.
+ */
if (cpu == smp_processor_id()) {
- WRITE_ONCE(event->pending_disable, -1);
-
- if (event->attr.sigtrap) {
+ if (event->pending_sigtrap) {
+ event->pending_sigtrap = 0;
perf_sigtrap(event);
- atomic_set_release(&event->event_limit, 1); /* rearm event */
- return;
+ local_dec(&event->ctx->nr_pending);
+ }
+ if (event->pending_disable) {
+ event->pending_disable = 0;
+ perf_event_disable_local(event);
}
-
- perf_event_disable_local(event);
return;
}
@@ -6484,35 +6523,62 @@ static void perf_pending_event_disable(struct perf_event *event)
* irq_work_queue(); // FAILS
*
* irq_work_run()
- * perf_pending_event()
+ * perf_pending_irq()
*
* But the event runs on CPU-B and wants disabling there.
*/
- irq_work_queue_on(&event->pending, cpu);
+ irq_work_queue_on(&event->pending_irq, cpu);
}
-static void perf_pending_event(struct irq_work *entry)
+static void perf_pending_irq(struct irq_work *entry)
{
- struct perf_event *event = container_of(entry, struct perf_event, pending);
+ struct perf_event *event = container_of(entry, struct perf_event, pending_irq);
int rctx;
- rctx = perf_swevent_get_recursion_context();
/*
* If we 'fail' here, that's OK, it means recursion is already disabled
* and we won't recurse 'further'.
*/
+ rctx = perf_swevent_get_recursion_context();
- perf_pending_event_disable(event);
-
+ /*
+ * The wakeup isn't bound to the context of the event -- it can happen
+ * irrespective of where the event is.
+ */
if (event->pending_wakeup) {
event->pending_wakeup = 0;
perf_event_wakeup(event);
}
+ __perf_pending_irq(event);
+
if (rctx >= 0)
perf_swevent_put_recursion_context(rctx);
}
+static void perf_pending_task(struct callback_head *head)
+{
+ struct perf_event *event = container_of(head, struct perf_event, pending_task);
+ int rctx;
+
+ /*
+ * If we 'fail' here, that's OK, it means recursion is already disabled
+ * and we won't recurse 'further'.
+ */
+ preempt_disable_notrace();
+ rctx = perf_swevent_get_recursion_context();
+
+ if (event->pending_work) {
+ event->pending_work = 0;
+ perf_sigtrap(event);
+ local_dec(&event->ctx->nr_pending);
+ }
+
+ if (rctx >= 0)
+ perf_swevent_put_recursion_context(rctx);
+ preempt_enable_notrace();
+}
+
#ifdef CONFIG_GUEST_PERF_EVENTS
struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
@@ -9212,8 +9278,8 @@ int perf_event_account_interrupt(struct perf_event *event)
*/
static int __perf_event_overflow(struct perf_event *event,
- int throttle, struct perf_sample_data *data,
- struct pt_regs *regs)
+ int throttle, struct perf_sample_data *data,
+ struct pt_regs *regs)
{
int events = atomic_read(&event->event_limit);
int ret = 0;
@@ -9236,24 +9302,36 @@ static int __perf_event_overflow(struct perf_event *event,
if (events && atomic_dec_and_test(&event->event_limit)) {
ret = 1;
event->pending_kill = POLL_HUP;
- event->pending_addr = data->addr;
-
perf_event_disable_inatomic(event);
}
+ if (event->attr.sigtrap) {
+ /*
+ * Should not be able to return to user space without processing
+ * pending_sigtrap (kernel events can overflow multiple times).
+ */
+ WARN_ON_ONCE(event->pending_sigtrap && event->attr.exclude_kernel);
+ if (!event->pending_sigtrap) {
+ event->pending_sigtrap = 1;
+ local_inc(&event->ctx->nr_pending);
+ }
+ event->pending_addr = data->addr;
+ irq_work_queue(&event->pending_irq);
+ }
+
READ_ONCE(event->overflow_handler)(event, data, regs);
if (*perf_event_fasync(event) && event->pending_kill) {
event->pending_wakeup = 1;
- irq_work_queue(&event->pending);
+ irq_work_queue(&event->pending_irq);
}
return ret;
}
int perf_event_overflow(struct perf_event *event,
- struct perf_sample_data *data,
- struct pt_regs *regs)
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
{
return __perf_event_overflow(event, 1, data, regs);
}
@@ -9768,6 +9846,7 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
perf_sample_data_init(&data, 0, 0);
data.raw = &raw;
+ data.sample_flags |= PERF_SAMPLE_RAW;
perf_trace_buf_update(record, event_type);
@@ -11570,8 +11649,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
init_waitqueue_head(&event->waitq);
- event->pending_disable = -1;
- init_irq_work(&event->pending, perf_pending_event);
+ init_irq_work(&event->pending_irq, perf_pending_irq);
+ init_task_work(&event->pending_task, perf_pending_task);
mutex_init(&event->mmap_mutex);
raw_spin_lock_init(&event->addr_filters.lock);
@@ -11593,9 +11672,6 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
if (parent_event)
event->event_caps = parent_event->event_caps;
- if (event->attr.sigtrap)
- atomic_set(&event->event_limit, 1);
-
if (task) {
event->attach_state = PERF_ATTACH_TASK;
/*
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 726132039c38..273a0fe7910a 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -22,7 +22,7 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
atomic_set(&handle->rb->poll, EPOLLIN);
handle->event->pending_wakeup = 1;
- irq_work_queue(&handle->event->pending);
+ irq_work_queue(&handle->event->pending_irq);
}
/*
diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c
index 460c12b7dfea..7971e989e425 100644
--- a/kernel/gcov/gcc_4_7.c
+++ b/kernel/gcov/gcc_4_7.c
@@ -30,6 +30,13 @@
#define GCOV_TAG_FUNCTION_LENGTH 3
+/* Since GCC 12.1 sizes are in BYTES and not in WORDS (4B). */
+#if (__GNUC__ >= 12)
+#define GCOV_UNIT_SIZE 4
+#else
+#define GCOV_UNIT_SIZE 1
+#endif
+
static struct gcov_info *gcov_info_head;
/**
@@ -383,12 +390,18 @@ size_t convert_to_gcda(char *buffer, struct gcov_info *info)
pos += store_gcov_u32(buffer, pos, info->version);
pos += store_gcov_u32(buffer, pos, info->stamp);
+#if (__GNUC__ >= 12)
+ /* Use zero as checksum of the compilation unit. */
+ pos += store_gcov_u32(buffer, pos, 0);
+#endif
+
for (fi_idx = 0; fi_idx < info->n_functions; fi_idx++) {
fi_ptr = info->functions[fi_idx];
/* Function record. */
pos += store_gcov_u32(buffer, pos, GCOV_TAG_FUNCTION);
- pos += store_gcov_u32(buffer, pos, GCOV_TAG_FUNCTION_LENGTH);
+ pos += store_gcov_u32(buffer, pos,
+ GCOV_TAG_FUNCTION_LENGTH * GCOV_UNIT_SIZE);
pos += store_gcov_u32(buffer, pos, fi_ptr->ident);
pos += store_gcov_u32(buffer, pos, fi_ptr->lineno_checksum);
pos += store_gcov_u32(buffer, pos, fi_ptr->cfg_checksum);
@@ -402,7 +415,8 @@ size_t convert_to_gcda(char *buffer, struct gcov_info *info)
/* Counter record. */
pos += store_gcov_u32(buffer, pos,
GCOV_TAG_FOR_COUNTER(ct_idx));
- pos += store_gcov_u32(buffer, pos, ci_ptr->num * 2);
+ pos += store_gcov_u32(buffer, pos,
+ ci_ptr->num * 2 * GCOV_UNIT_SIZE);
for (cv_idx = 0; cv_idx < ci_ptr->num; cv_idx++) {
pos += store_gcov_u64(buffer, pos,
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index f58a0aa92310..793c55a2becb 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -645,7 +645,7 @@ static void power_down(void)
int error;
if (hibernation_mode == HIBERNATION_SUSPEND) {
- error = suspend_devices_and_enter(PM_SUSPEND_MEM);
+ error = suspend_devices_and_enter(mem_sleep_current);
if (error) {
hibernation_mode = hibernation_ops ?
HIBERNATION_PLATFORM :
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 6bb8e72bc815..93416afebd59 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1403,30 +1403,32 @@ static void rcu_poll_gp_seq_end(unsigned long *snap)
// where caller does not hold the root rcu_node structure's lock.
static void rcu_poll_gp_seq_start_unlocked(unsigned long *snap)
{
+ unsigned long flags;
struct rcu_node *rnp = rcu_get_root();
if (rcu_init_invoked()) {
lockdep_assert_irqs_enabled();
- raw_spin_lock_irq_rcu_node(rnp);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
}
rcu_poll_gp_seq_start(snap);
if (rcu_init_invoked())
- raw_spin_unlock_irq_rcu_node(rnp);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
// Make the polled API aware of the end of a grace period, but where
// caller does not hold the root rcu_node structure's lock.
static void rcu_poll_gp_seq_end_unlocked(unsigned long *snap)
{
+ unsigned long flags;
struct rcu_node *rnp = rcu_get_root();
if (rcu_init_invoked()) {
lockdep_assert_irqs_enabled();
- raw_spin_lock_irq_rcu_node(rnp);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
}
rcu_poll_gp_seq_end(snap);
if (rcu_init_invoked())
- raw_spin_unlock_irq_rcu_node(rnp);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5800b0623ff3..cb2aa2b54c7a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4823,10 +4823,10 @@ static inline void finish_task(struct task_struct *prev)
#ifdef CONFIG_SMP
-static void do_balance_callbacks(struct rq *rq, struct callback_head *head)
+static void do_balance_callbacks(struct rq *rq, struct balance_callback *head)
{
void (*func)(struct rq *rq);
- struct callback_head *next;
+ struct balance_callback *next;
lockdep_assert_rq_held(rq);
@@ -4853,15 +4853,15 @@ static void balance_push(struct rq *rq);
* This abuse is tolerated because it places all the unlikely/odd cases behind
* a single test, namely: rq->balance_callback == NULL.
*/
-struct callback_head balance_push_callback = {
+struct balance_callback balance_push_callback = {
.next = NULL,
- .func = (void (*)(struct callback_head *))balance_push,
+ .func = balance_push,
};
-static inline struct callback_head *
+static inline struct balance_callback *
__splice_balance_callbacks(struct rq *rq, bool split)
{
- struct callback_head *head = rq->balance_callback;
+ struct balance_callback *head = rq->balance_callback;
if (likely(!head))
return NULL;
@@ -4883,7 +4883,7 @@ __splice_balance_callbacks(struct rq *rq, bool split)
return head;
}
-static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
+static inline struct balance_callback *splice_balance_callbacks(struct rq *rq)
{
return __splice_balance_callbacks(rq, true);
}
@@ -4893,7 +4893,7 @@ static void __balance_callbacks(struct rq *rq)
do_balance_callbacks(rq, __splice_balance_callbacks(rq, false));
}
-static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
+static inline void balance_callbacks(struct rq *rq, struct balance_callback *head)
{
unsigned long flags;
@@ -4910,12 +4910,12 @@ static inline void __balance_callbacks(struct rq *rq)
{
}
-static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
+static inline struct balance_callback *splice_balance_callbacks(struct rq *rq)
{
return NULL;
}
-static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
+static inline void balance_callbacks(struct rq *rq, struct balance_callback *head)
{
}
@@ -6188,7 +6188,7 @@ static void sched_core_balance(struct rq *rq)
preempt_enable();
}
-static DEFINE_PER_CPU(struct callback_head, core_balance_head);
+static DEFINE_PER_CPU(struct balance_callback, core_balance_head);
static void queue_core_balance(struct rq *rq)
{
@@ -7419,7 +7419,7 @@ static int __sched_setscheduler(struct task_struct *p,
int oldpolicy = -1, policy = attr->sched_policy;
int retval, oldprio, newprio, queued, running;
const struct sched_class *prev_class;
- struct callback_head *head;
+ struct balance_callback *head;
struct rq_flags rf;
int reset_on_fork;
int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 86dea6a05267..9ae8f41e3372 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -644,8 +644,8 @@ static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
return rq->online && dl_task(prev);
}
-static DEFINE_PER_CPU(struct callback_head, dl_push_head);
-static DEFINE_PER_CPU(struct callback_head, dl_pull_head);
+static DEFINE_PER_CPU(struct balance_callback, dl_push_head);
+static DEFINE_PER_CPU(struct balance_callback, dl_pull_head);
static void push_dl_tasks(struct rq *);
static void pull_dl_task(struct rq *);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index d869bcf898cc..ed2a47e4ddae 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -410,8 +410,8 @@ static inline int has_pushable_tasks(struct rq *rq)
return !plist_head_empty(&rq->rt.pushable_tasks);
}
-static DEFINE_PER_CPU(struct callback_head, rt_push_head);
-static DEFINE_PER_CPU(struct callback_head, rt_pull_head);
+static DEFINE_PER_CPU(struct balance_callback, rt_push_head);
+static DEFINE_PER_CPU(struct balance_callback, rt_pull_head);
static void push_rt_tasks(struct rq *);
static void pull_rt_task(struct rq *);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 1644242ecd11..a4a20046e586 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -938,6 +938,12 @@ struct uclamp_rq {
DECLARE_STATIC_KEY_FALSE(sched_uclamp_used);
#endif /* CONFIG_UCLAMP_TASK */
+struct rq;
+struct balance_callback {
+ struct balance_callback *next;
+ void (*func)(struct rq *rq);
+};
+
/*
* This is the main, per-CPU runqueue data structure.
*
@@ -1036,7 +1042,7 @@ struct rq {
unsigned long cpu_capacity;
unsigned long cpu_capacity_orig;
- struct callback_head *balance_callback;
+ struct balance_callback *balance_callback;
unsigned char nohz_idle_balance;
unsigned char idle_balance;
@@ -1182,6 +1188,14 @@ static inline bool is_migration_disabled(struct task_struct *p)
#endif
}
+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
+
+#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
+#define this_rq() this_cpu_ptr(&runqueues)
+#define task_rq(p) cpu_rq(task_cpu(p))
+#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
+#define raw_rq() raw_cpu_ptr(&runqueues)
+
struct sched_group;
#ifdef CONFIG_SCHED_CORE
static inline struct cpumask *sched_group_span(struct sched_group *sg);
@@ -1269,7 +1283,7 @@ static inline bool sched_group_cookie_match(struct rq *rq,
return true;
for_each_cpu_and(cpu, sched_group_span(group), p->cpus_ptr) {
- if (sched_core_cookie_match(rq, p))
+ if (sched_core_cookie_match(cpu_rq(cpu), p))
return true;
}
return false;
@@ -1384,14 +1398,6 @@ static inline void update_idle_core(struct rq *rq)
static inline void update_idle_core(struct rq *rq) { }
#endif
-DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-
-#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
-#define this_rq() this_cpu_ptr(&runqueues)
-#define task_rq(p) cpu_rq(task_cpu(p))
-#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
-#define raw_rq() raw_cpu_ptr(&runqueues)
-
#ifdef CONFIG_FAIR_GROUP_SCHED
static inline struct task_struct *task_of(struct sched_entity *se)
{
@@ -1544,7 +1550,7 @@ struct rq_flags {
#endif
};
-extern struct callback_head balance_push_callback;
+extern struct balance_callback balance_push_callback;
/*
* Lockdep annotation that avoids accidental unlocks; it's like a
@@ -1724,7 +1730,7 @@ init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
static inline void
queue_balance_callback(struct rq *rq,
- struct callback_head *head,
+ struct balance_callback *head,
void (*func)(struct rq *rq))
{
lockdep_assert_rq_held(rq);
@@ -1737,7 +1743,7 @@ queue_balance_callback(struct rq *rq,
if (unlikely(head->next || rq->balance_callback == &balance_push_callback))
return;
- head->func = (void (*)(struct callback_head *))func;
+ head->func = func;
head->next = rq->balance_callback;
rq->balance_callback = head;
}
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 7f5eb295fe19..a995ea1ef849 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -346,8 +346,40 @@ static void put_probe_ref(void)
mutex_unlock(&blk_probe_mutex);
}
+static int blk_trace_start(struct blk_trace *bt)
+{
+ if (bt->trace_state != Blktrace_setup &&
+ bt->trace_state != Blktrace_stopped)
+ return -EINVAL;
+
+ blktrace_seq++;
+ smp_mb();
+ bt->trace_state = Blktrace_running;
+ raw_spin_lock_irq(&running_trace_lock);
+ list_add(&bt->running_list, &running_trace_list);
+ raw_spin_unlock_irq(&running_trace_lock);
+ trace_note_time(bt);
+
+ return 0;
+}
+
+static int blk_trace_stop(struct blk_trace *bt)
+{
+ if (bt->trace_state != Blktrace_running)
+ return -EINVAL;
+
+ bt->trace_state = Blktrace_stopped;
+ raw_spin_lock_irq(&running_trace_lock);
+ list_del_init(&bt->running_list);
+ raw_spin_unlock_irq(&running_trace_lock);
+ relay_flush(bt->rchan);
+
+ return 0;
+}
+
static void blk_trace_cleanup(struct request_queue *q, struct blk_trace *bt)
{
+ blk_trace_stop(bt);
synchronize_rcu();
blk_trace_free(q, bt);
put_probe_ref();
@@ -362,8 +394,7 @@ static int __blk_trace_remove(struct request_queue *q)
if (!bt)
return -EINVAL;
- if (bt->trace_state != Blktrace_running)
- blk_trace_cleanup(q, bt);
+ blk_trace_cleanup(q, bt);
return 0;
}
@@ -658,7 +689,6 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
static int __blk_trace_startstop(struct request_queue *q, int start)
{
- int ret;
struct blk_trace *bt;
bt = rcu_dereference_protected(q->blk_trace,
@@ -666,36 +696,10 @@ static int __blk_trace_startstop(struct request_queue *q, int start)
if (bt == NULL)
return -EINVAL;
- /*
- * For starting a trace, we can transition from a setup or stopped
- * trace. For stopping a trace, the state must be running
- */
- ret = -EINVAL;
- if (start) {
- if (bt->trace_state == Blktrace_setup ||
- bt->trace_state == Blktrace_stopped) {
- blktrace_seq++;
- smp_mb();
- bt->trace_state = Blktrace_running;
- raw_spin_lock_irq(&running_trace_lock);
- list_add(&bt->running_list, &running_trace_list);
- raw_spin_unlock_irq(&running_trace_lock);
-
- trace_note_time(bt);
- ret = 0;
- }
- } else {
- if (bt->trace_state == Blktrace_running) {
- bt->trace_state = Blktrace_stopped;
- raw_spin_lock_irq(&running_trace_lock);
- list_del_init(&bt->running_list);
- raw_spin_unlock_irq(&running_trace_lock);
- relay_flush(bt->rchan);
- ret = 0;
- }
- }
-
- return ret;
+ if (start)
+ return blk_trace_start(bt);
+ else
+ return blk_trace_stop(bt);
}
int blk_trace_startstop(struct request_queue *q, int start)
@@ -772,10 +776,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
void blk_trace_shutdown(struct request_queue *q)
{
if (rcu_dereference_protected(q->blk_trace,
- lockdep_is_held(&q->debugfs_mutex))) {
- __blk_trace_startstop(q, 0);
+ lockdep_is_held(&q->debugfs_mutex)))
__blk_trace_remove(q);
- }
}
#ifdef CONFIG_BLK_CGROUP
@@ -1614,13 +1616,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
if (bt == NULL)
return -EINVAL;
- if (bt->trace_state == Blktrace_running) {
- bt->trace_state = Blktrace_stopped;
- raw_spin_lock_irq(&running_trace_lock);
- list_del_init(&bt->running_list);
- raw_spin_unlock_irq(&running_trace_lock);
- relay_flush(bt->rchan);
- }
+ blk_trace_stop(bt);
put_probe_ref();
synchronize_rcu();
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 49fb9ec8366d..1ed08967fb97 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -687,6 +687,7 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
perf_sample_data_init(sd, 0, 0);
sd->raw = &raw;
+ sd->sample_flags |= PERF_SAMPLE_RAW;
err = __bpf_perf_event_output(regs, map, flags, sd);
@@ -745,6 +746,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
perf_fetch_caller_regs(regs);
perf_sample_data_init(sd, 0, 0);
sd->raw = &raw;
+ sd->sample_flags |= PERF_SAMPLE_RAW;
ret = __bpf_perf_event_output(regs, map, flags, sd);
out:
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index 064072c16e3d..f50398cb790d 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -74,6 +74,7 @@ static int proc_do_uts_string(struct ctl_table *table, int write,
static DEFINE_CTL_TABLE_POLL(hostname_poll);
static DEFINE_CTL_TABLE_POLL(domainname_poll);
+// Note: update 'enum uts_proc' to match any changes to this table
static struct ctl_table uts_kern_table[] = {
{
.procname = "arch",