aboutsummaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig10
-rw-r--r--kernel/trace/bpf_trace.c116
-rw-r--r--kernel/trace/fgraph.c155
-rw-r--r--kernel/trace/ftrace.c118
-rw-r--r--kernel/trace/ring_buffer.c102
-rw-r--r--kernel/trace/ring_buffer_benchmark.c4
-rw-r--r--kernel/trace/rv/rv.c2
-rw-r--r--kernel/trace/trace.c96
-rw-r--r--kernel/trace/trace.h22
-rw-r--r--kernel/trace/trace_branch.c10
-rw-r--r--kernel/trace/trace_clock.c2
-rw-r--r--kernel/trace/trace_entries.h29
-rw-r--r--kernel/trace/trace_event_perf.c6
-rw-r--r--kernel/trace/trace_events.c2
-rw-r--r--kernel/trace/trace_events_filter.c8
-rw-r--r--kernel/trace/trace_events_hist.c11
-rw-r--r--kernel/trace/trace_events_user.c4
-rw-r--r--kernel/trace/trace_functions.c36
-rw-r--r--kernel/trace/trace_functions_graph.c272
-rw-r--r--kernel/trace/trace_hwlat.c4
-rw-r--r--kernel/trace/trace_kdb.c13
-rw-r--r--kernel/trace/trace_mmiotrace.c8
-rw-r--r--kernel/trace/trace_osnoise.c12
-rw-r--r--kernel/trace/trace_output.c5
-rw-r--r--kernel/trace/trace_preemptirq.c26
-rw-r--r--kernel/trace/trace_sched_switch.c2
-rw-r--r--kernel/trace/trace_sched_wakeup.c8
-rw-r--r--kernel/trace/trace_selftest.c1
-rw-r--r--kernel/trace/trace_syscalls.c28
-rw-r--r--kernel/trace/trace_uprobe.c12
30 files changed, 775 insertions, 349 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 721c3b221048..74c2b1d43bb9 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -242,6 +242,16 @@ config FUNCTION_GRAPH_RETVAL
enable it via the trace option funcgraph-retval.
See Documentation/trace/ftrace.rst
+config FUNCTION_GRAPH_RETADDR
+ bool "Kernel Function Graph Return Address"
+ depends on FUNCTION_GRAPH_TRACER
+ default n
+ help
+ Support recording and printing the function return address when
+ using function graph tracer. It can be helpful to locate code line that
+ the function is called. This feature is off by default, and you can
+ enable it via the trace option funcgraph-retaddr.
+
config DYNAMIC_FTRACE
bool "enable/disable function tracing dynamically"
depends on FUNCTION_TRACER
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 630b763e5240..949a3870946c 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -802,6 +802,8 @@ struct send_signal_irq_work {
struct task_struct *task;
u32 sig;
enum pid_type type;
+ bool has_siginfo;
+ struct kernel_siginfo info;
};
static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
@@ -809,27 +811,46 @@ static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
static void do_bpf_send_signal(struct irq_work *entry)
{
struct send_signal_irq_work *work;
+ struct kernel_siginfo *siginfo;
work = container_of(entry, struct send_signal_irq_work, irq_work);
- group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type);
+ siginfo = work->has_siginfo ? &work->info : SEND_SIG_PRIV;
+
+ group_send_sig_info(work->sig, siginfo, work->task, work->type);
put_task_struct(work->task);
}
-static int bpf_send_signal_common(u32 sig, enum pid_type type)
+static int bpf_send_signal_common(u32 sig, enum pid_type type, struct task_struct *task, u64 value)
{
struct send_signal_irq_work *work = NULL;
+ struct kernel_siginfo info;
+ struct kernel_siginfo *siginfo;
+
+ if (!task) {
+ task = current;
+ siginfo = SEND_SIG_PRIV;
+ } else {
+ clear_siginfo(&info);
+ info.si_signo = sig;
+ info.si_errno = 0;
+ info.si_code = SI_KERNEL;
+ info.si_pid = 0;
+ info.si_uid = 0;
+ info.si_value.sival_ptr = (void *)(unsigned long)value;
+ siginfo = &info;
+ }
/* Similar to bpf_probe_write_user, task needs to be
* in a sound condition and kernel memory access be
* permitted in order to send signal to the current
* task.
*/
- if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING)))
+ if (unlikely(task->flags & (PF_KTHREAD | PF_EXITING)))
return -EPERM;
if (unlikely(!nmi_uaccess_okay()))
return -EPERM;
/* Task should not be pid=1 to avoid kernel panic. */
- if (unlikely(is_global_init(current)))
+ if (unlikely(is_global_init(task)))
return -EPERM;
if (irqs_disabled()) {
@@ -847,19 +868,22 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type)
* to the irq_work. The current task may change when queued
* irq works get executed.
*/
- work->task = get_task_struct(current);
+ work->task = get_task_struct(task);
+ work->has_siginfo = siginfo == &info;
+ if (work->has_siginfo)
+ copy_siginfo(&work->info, &info);
work->sig = sig;
work->type = type;
irq_work_queue(&work->irq_work);
return 0;
}
- return group_send_sig_info(sig, SEND_SIG_PRIV, current, type);
+ return group_send_sig_info(sig, siginfo, task, type);
}
BPF_CALL_1(bpf_send_signal, u32, sig)
{
- return bpf_send_signal_common(sig, PIDTYPE_TGID);
+ return bpf_send_signal_common(sig, PIDTYPE_TGID, NULL, 0);
}
static const struct bpf_func_proto bpf_send_signal_proto = {
@@ -871,7 +895,7 @@ static const struct bpf_func_proto bpf_send_signal_proto = {
BPF_CALL_1(bpf_send_signal_thread, u32, sig)
{
- return bpf_send_signal_common(sig, PIDTYPE_PID);
+ return bpf_send_signal_common(sig, PIDTYPE_PID, NULL, 0);
}
static const struct bpf_func_proto bpf_send_signal_thread_proto = {
@@ -1557,6 +1581,17 @@ static inline bool is_kprobe_session(const struct bpf_prog *prog)
return prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION;
}
+static inline bool is_uprobe_multi(const struct bpf_prog *prog)
+{
+ return prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI ||
+ prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION;
+}
+
+static inline bool is_uprobe_session(const struct bpf_prog *prog)
+{
+ return prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION;
+}
+
static const struct bpf_func_proto *
kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -1574,13 +1609,13 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_func_ip:
if (is_kprobe_multi(prog))
return &bpf_get_func_ip_proto_kprobe_multi;
- if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI)
+ if (is_uprobe_multi(prog))
return &bpf_get_func_ip_proto_uprobe_multi;
return &bpf_get_func_ip_proto_kprobe;
case BPF_FUNC_get_attach_cookie:
if (is_kprobe_multi(prog))
return &bpf_get_attach_cookie_proto_kmulti;
- if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI)
+ if (is_uprobe_multi(prog))
return &bpf_get_attach_cookie_proto_umulti;
return &bpf_get_attach_cookie_proto_trace;
default:
@@ -3072,6 +3107,7 @@ struct bpf_uprobe {
u64 cookie;
struct uprobe *uprobe;
struct uprobe_consumer consumer;
+ bool session;
};
struct bpf_uprobe_multi_link {
@@ -3084,7 +3120,7 @@ struct bpf_uprobe_multi_link {
};
struct bpf_uprobe_multi_run_ctx {
- struct bpf_run_ctx run_ctx;
+ struct bpf_session_run_ctx session_ctx;
unsigned long entry_ip;
struct bpf_uprobe *uprobe;
};
@@ -3195,17 +3231,22 @@ static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
static int uprobe_prog_run(struct bpf_uprobe *uprobe,
unsigned long entry_ip,
- struct pt_regs *regs)
+ struct pt_regs *regs,
+ bool is_return, void *data)
{
struct bpf_uprobe_multi_link *link = uprobe->link;
struct bpf_uprobe_multi_run_ctx run_ctx = {
+ .session_ctx = {
+ .is_return = is_return,
+ .data = data,
+ },
.entry_ip = entry_ip,
.uprobe = uprobe,
};
struct bpf_prog *prog = link->link.prog;
bool sleepable = prog->sleepable;
struct bpf_run_ctx *old_run_ctx;
- int err = 0;
+ int err;
if (link->task && !same_thread_group(current, link->task))
return 0;
@@ -3217,7 +3258,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe,
migrate_disable();
- old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
err = bpf_prog_run(link->link.prog, regs);
bpf_reset_run_ctx(old_run_ctx);
@@ -3240,28 +3281,36 @@ uprobe_multi_link_filter(struct uprobe_consumer *con, struct mm_struct *mm)
}
static int
-uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs)
+uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs,
+ __u64 *data)
{
struct bpf_uprobe *uprobe;
+ int ret;
uprobe = container_of(con, struct bpf_uprobe, consumer);
- return uprobe_prog_run(uprobe, instruction_pointer(regs), regs);
+ ret = uprobe_prog_run(uprobe, instruction_pointer(regs), regs, false, data);
+ if (uprobe->session)
+ return ret ? UPROBE_HANDLER_IGNORE : 0;
+ return 0;
}
static int
-uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs)
+uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs,
+ __u64 *data)
{
struct bpf_uprobe *uprobe;
uprobe = container_of(con, struct bpf_uprobe, consumer);
- return uprobe_prog_run(uprobe, func, regs);
+ uprobe_prog_run(uprobe, func, regs, true, data);
+ return 0;
}
static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
{
struct bpf_uprobe_multi_run_ctx *run_ctx;
- run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, run_ctx);
+ run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx,
+ session_ctx.run_ctx);
return run_ctx->entry_ip;
}
@@ -3269,7 +3318,8 @@ static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx)
{
struct bpf_uprobe_multi_run_ctx *run_ctx;
- run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, run_ctx);
+ run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx,
+ session_ctx.run_ctx);
return run_ctx->uprobe->cookie;
}
@@ -3293,7 +3343,7 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
if (sizeof(u64) != sizeof(void *))
return -EOPNOTSUPP;
- if (prog->expected_attach_type != BPF_TRACE_UPROBE_MULTI)
+ if (!is_uprobe_multi(prog))
return -EINVAL;
flags = attr->link_create.uprobe_multi.flags;
@@ -3369,11 +3419,12 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
uprobes[i].link = link;
- if (flags & BPF_F_UPROBE_MULTI_RETURN)
- uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler;
- else
+ if (!(flags & BPF_F_UPROBE_MULTI_RETURN))
uprobes[i].consumer.handler = uprobe_multi_link_handler;
-
+ if (flags & BPF_F_UPROBE_MULTI_RETURN || is_uprobe_session(prog))
+ uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler;
+ if (is_uprobe_session(prog))
+ uprobes[i].session = true;
if (pid)
uprobes[i].consumer.filter = uprobe_multi_link_filter;
}
@@ -3462,7 +3513,7 @@ static int bpf_kprobe_multi_filter(const struct bpf_prog *prog, u32 kfunc_id)
if (!btf_id_set8_contains(&kprobe_multi_kfunc_set_ids, kfunc_id))
return 0;
- if (!is_kprobe_session(prog))
+ if (!is_kprobe_session(prog) && !is_uprobe_session(prog))
return -EACCES;
return 0;
@@ -3480,3 +3531,16 @@ static int __init bpf_kprobe_multi_kfuncs_init(void)
}
late_initcall(bpf_kprobe_multi_kfuncs_init);
+
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type,
+ u64 value)
+{
+ if (type != PIDTYPE_PID && type != PIDTYPE_TGID)
+ return -EINVAL;
+
+ return bpf_send_signal_common(sig, type, task, value);
+}
+
+__bpf_kfunc_end_defs();
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 69e226a48daa..0bf78517b5d4 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -153,7 +153,7 @@ enum {
* SHADOW_STACK_OFFSET: The size in long words of the shadow stack
* SHADOW_STACK_MAX_OFFSET: The max offset of the stack for a new frame to be added
*/
-#define SHADOW_STACK_SIZE (PAGE_SIZE)
+#define SHADOW_STACK_SIZE (4096)
#define SHADOW_STACK_OFFSET (SHADOW_STACK_SIZE / sizeof(long))
/* Leave on a buffer at the end */
#define SHADOW_STACK_MAX_OFFSET \
@@ -172,6 +172,8 @@ enum {
DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph);
int ftrace_graph_active;
+static struct kmem_cache *fgraph_stack_cachep;
+
static struct fgraph_ops *fgraph_array[FGRAPH_ARRAY_SIZE];
static unsigned long fgraph_array_bitmask;
@@ -390,21 +392,7 @@ void *fgraph_reserve_data(int idx, int size_bytes)
*/
void *fgraph_retrieve_data(int idx, int *size_bytes)
{
- int offset = current->curr_ret_stack - 1;
- unsigned long val;
-
- val = get_fgraph_entry(current, offset);
- while (__get_type(val) == FGRAPH_TYPE_DATA) {
- if (__get_data_index(val) == idx)
- goto found;
- offset -= __get_data_size(val) + 1;
- val = get_fgraph_entry(current, offset);
- }
- return NULL;
-found:
- if (size_bytes)
- *size_bytes = __get_data_size(val) * sizeof(long);
- return get_data_type_data(current, offset);
+ return fgraph_retrieve_parent_data(idx, size_bytes, 0);
}
/**
@@ -460,8 +448,56 @@ get_ret_stack(struct task_struct *t, int offset, int *frame_offset)
return RET_STACK(t, offset);
}
+/**
+ * fgraph_retrieve_parent_data - get data from a parent function
+ * @idx: The index into the fgraph_array (fgraph_ops::idx)
+ * @size_bytes: A pointer to retrieved data size
+ * @depth: The depth to find the parent (0 is the current function)
+ *
+ * This is similar to fgraph_retrieve_data() but can be used to retrieve
+ * data from a parent caller function.
+ *
+ * Return: a pointer to the specified parent data or NULL if not found
+ */
+void *fgraph_retrieve_parent_data(int idx, int *size_bytes, int depth)
+{
+ struct ftrace_ret_stack *ret_stack = NULL;
+ int offset = current->curr_ret_stack;
+ unsigned long val;
+
+ if (offset <= 0)
+ return NULL;
+
+ for (;;) {
+ int next_offset;
+
+ ret_stack = get_ret_stack(current, offset, &next_offset);
+ if (!ret_stack || --depth < 0)
+ break;
+ offset = next_offset;
+ }
+
+ if (!ret_stack)
+ return NULL;
+
+ offset--;
+
+ val = get_fgraph_entry(current, offset);
+ while (__get_type(val) == FGRAPH_TYPE_DATA) {
+ if (__get_data_index(val) == idx)
+ goto found;
+ offset -= __get_data_size(val) + 1;
+ val = get_fgraph_entry(current, offset);
+ }
+ return NULL;
+found:
+ if (size_bytes)
+ *size_bytes = __get_data_size(val) * sizeof(long);
+ return get_data_type_data(current, offset);
+}
+
/* Both enabled by default (can be cleared by function_graph tracer flags */
-static bool fgraph_sleep_time = true;
+bool fgraph_sleep_time = true;
#ifdef CONFIG_DYNAMIC_FTRACE
/*
@@ -524,7 +560,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func,
int fgraph_idx)
{
struct ftrace_ret_stack *ret_stack;
- unsigned long long calltime;
unsigned long val;
int offset;
@@ -554,8 +589,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func,
return -EBUSY;
}
- calltime = trace_clock_local();
-
offset = READ_ONCE(current->curr_ret_stack);
ret_stack = RET_STACK(current, offset);
offset += FGRAPH_FRAME_OFFSET;
@@ -589,7 +622,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func,
ret_stack->ret = ret;
ret_stack->func = func;
- ret_stack->calltime = calltime;
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
ret_stack->fp = frame_pointer;
#endif
@@ -723,7 +755,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
*offset += FGRAPH_FRAME_OFFSET;
*ret = ret_stack->ret;
trace->func = ret_stack->func;
- trace->calltime = ret_stack->calltime;
trace->overrun = atomic_read(&current->trace_overrun);
trace->depth = current->curr_ret_depth;
/*
@@ -868,6 +899,29 @@ ftrace_graph_get_ret_stack(struct task_struct *task, int idx)
}
/**
+ * ftrace_graph_top_ret_addr - return the top return address in the shadow stack
+ * @task: The task to read the shadow stack from.
+ *
+ * Return the first return address on the shadow stack of the @task, which is
+ * not the fgraph's return_to_handler.
+ */
+unsigned long ftrace_graph_top_ret_addr(struct task_struct *task)
+{
+ unsigned long return_handler = (unsigned long)dereference_kernel_function_descriptor(return_to_handler);
+ struct ftrace_ret_stack *ret_stack = NULL;
+ int offset = task->curr_ret_stack;
+
+ if (offset < 0)
+ return 0;
+
+ do {
+ ret_stack = get_ret_stack(task, offset, &offset);
+ } while (ret_stack && ret_stack->ret == return_handler);
+
+ return ret_stack ? ret_stack->ret : 0;
+}
+
+/**
* ftrace_graph_ret_addr - return the original value of the return address
* @task: The task the unwinder is being executed on
* @idx: An initialized pointer to the next stack index to use
@@ -892,7 +946,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
{
struct ftrace_ret_stack *ret_stack;
unsigned long return_handler = (unsigned long)dereference_kernel_function_descriptor(return_to_handler);
- int i = task->curr_ret_stack;
+ int i;
if (ret != return_handler)
return ret;
@@ -970,8 +1024,11 @@ static int alloc_retstack_tasklist(unsigned long **ret_stack_list)
int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE;
struct task_struct *g, *t;
+ if (WARN_ON_ONCE(!fgraph_stack_cachep))
+ return -ENOMEM;
+
for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
- ret_stack_list[i] = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
+ ret_stack_list[i] = kmem_cache_alloc(fgraph_stack_cachep, GFP_KERNEL);
if (!ret_stack_list[i]) {
start = 0;
end = i;
@@ -1002,7 +1059,7 @@ unlock:
rcu_read_unlock();
free:
for (i = start; i < end; i++)
- kfree(ret_stack_list[i]);
+ kmem_cache_free(fgraph_stack_cachep, ret_stack_list[i]);
return ret;
}
@@ -1012,9 +1069,7 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
struct task_struct *next,
unsigned int prev_state)
{
- struct ftrace_ret_stack *ret_stack;
unsigned long long timestamp;
- int offset;
/*
* Does the user want to count the time a function was asleep.
@@ -1031,17 +1086,7 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
if (!next->ftrace_timestamp)
return;
- /*
- * Update all the counters in next to make up for the
- * time next was sleeping.
- */
- timestamp -= next->ftrace_timestamp;
-
- for (offset = next->curr_ret_stack; offset > 0; ) {
- ret_stack = get_ret_stack(next, offset, &offset);
- if (ret_stack)
- ret_stack->calltime += timestamp;
- }
+ next->ftrace_sleeptime += timestamp - next->ftrace_timestamp;
}
static DEFINE_PER_CPU(unsigned long *, idle_ret_stack);
@@ -1077,9 +1122,12 @@ void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
if (ftrace_graph_active) {
unsigned long *ret_stack;
+ if (WARN_ON_ONCE(!fgraph_stack_cachep))
+ return;
+
ret_stack = per_cpu(idle_ret_stack, cpu);
if (!ret_stack) {
- ret_stack = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
+ ret_stack = kmem_cache_alloc(fgraph_stack_cachep, GFP_KERNEL);
if (!ret_stack)
return;
per_cpu(idle_ret_stack, cpu) = ret_stack;
@@ -1099,7 +1147,10 @@ void ftrace_graph_init_task(struct task_struct *t)
if (ftrace_graph_active) {
unsigned long *ret_stack;
- ret_stack = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
+ if (WARN_ON_ONCE(!fgraph_stack_cachep))
+ return;
+
+ ret_stack = kmem_cache_alloc(fgraph_stack_cachep, GFP_KERNEL);
if (!ret_stack)
return;
graph_init_task(t, ret_stack);
@@ -1114,7 +1165,11 @@ void ftrace_graph_exit_task(struct task_struct *t)
/* NULL must become visible to IRQs before we free it: */
barrier();
- kfree(ret_stack);
+ if (ret_stack) {
+ if (WARN_ON_ONCE(!fgraph_stack_cachep))
+ return;
+ kmem_cache_free(fgraph_stack_cachep, ret_stack);
+ }
}
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -1254,6 +1309,14 @@ int register_ftrace_graph(struct fgraph_ops *gops)
guard(mutex)(&ftrace_lock);
+ if (!fgraph_stack_cachep) {
+ fgraph_stack_cachep = kmem_cache_create("fgraph_stack",
+ SHADOW_STACK_SIZE,
+ SHADOW_STACK_SIZE, 0, NULL);
+ if (!fgraph_stack_cachep)
+ return -ENOMEM;
+ }
+
if (!fgraph_initialized) {
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "fgraph:online",
fgraph_cpu_init, NULL);
@@ -1318,17 +1381,17 @@ void unregister_ftrace_graph(struct fgraph_ops *gops)
{
int command = 0;
- mutex_lock(&ftrace_lock);
+ guard(mutex)(&ftrace_lock);
if (unlikely(!ftrace_graph_active))
- goto out;
+ return;
if (unlikely(gops->idx < 0 || gops->idx >= FGRAPH_ARRAY_SIZE ||
fgraph_array[gops->idx] != gops))
- goto out;
+ return;
if (fgraph_lru_release_index(gops->idx) < 0)
- goto out;
+ return;
fgraph_array[gops->idx] = &fgraph_stub;
@@ -1350,7 +1413,5 @@ void unregister_ftrace_graph(struct fgraph_ops *gops)
unregister_pm_notifier(&ftrace_suspend_notifier);
unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
}
- out:
gops->saved_func = NULL;
- mutex_unlock(&ftrace_lock);
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4c28dd177ca6..9b17efb1a87d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -820,10 +820,16 @@ void ftrace_graph_graph_time_control(bool enable)
fgraph_graph_time = enable;
}
+struct profile_fgraph_data {
+ unsigned long long calltime;
+ unsigned long long subtime;
+ unsigned long long sleeptime;
+};
+
static int profile_graph_entry(struct ftrace_graph_ent *trace,
struct fgraph_ops *gops)
{
- struct ftrace_ret_stack *ret_stack;
+ struct profile_fgraph_data *profile_data;
function_profile_call(trace->func, 0, NULL, NULL);
@@ -831,9 +837,13 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace,
if (!current->ret_stack)
return 0;
- ret_stack = ftrace_graph_get_ret_stack(current, 0);
- if (ret_stack)
- ret_stack->subtime = 0;
+ profile_data = fgraph_reserve_data(gops->idx, sizeof(*profile_data));
+ if (!profile_data)
+ return 0;
+
+ profile_data->subtime = 0;
+ profile_data->sleeptime = current->ftrace_sleeptime;
+ profile_data->calltime = trace_clock_local();
return 1;
}
@@ -841,33 +851,42 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace,
static void profile_graph_return(struct ftrace_graph_ret *trace,
struct fgraph_ops *gops)
{
- struct ftrace_ret_stack *ret_stack;
+ struct profile_fgraph_data *profile_data;
struct ftrace_profile_stat *stat;
unsigned long long calltime;
+ unsigned long long rettime = trace_clock_local();
struct ftrace_profile *rec;
unsigned long flags;
+ int size;
local_irq_save(flags);
stat = this_cpu_ptr(&ftrace_profile_stats);
if (!stat->hash || !ftrace_profile_enabled)
goto out;
+ profile_data = fgraph_retrieve_data(gops->idx, &size);
+
/* If the calltime was zero'd ignore it */
- if (!trace->calltime)
+ if (!profile_data || !profile_data->calltime)
goto out;
- calltime = trace->rettime - trace->calltime;
+ calltime = rettime - profile_data->calltime;
+
+ if (!fgraph_sleep_time) {
+ if (current->ftrace_sleeptime)
+ calltime -= current->ftrace_sleeptime - profile_data->sleeptime;
+ }
if (!fgraph_graph_time) {
+ struct profile_fgraph_data *parent_data;
/* Append this call time to the parent time to subtract */
- ret_stack = ftrace_graph_get_ret_stack(current, 1);
- if (ret_stack)
- ret_stack->subtime += calltime;
+ parent_data = fgraph_retrieve_parent_data(gops->idx, &size, 1);
+ if (parent_data)
+ parent_data->subtime += calltime;
- ret_stack = ftrace_graph_get_ret_stack(current, 0);
- if (ret_stack && ret_stack->subtime < calltime)
- calltime -= ret_stack->subtime;
+ if (profile_data->subtime && profile_data->subtime < calltime)
+ calltime -= profile_data->subtime;
else
calltime = 0;
}
@@ -883,6 +902,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace,
}
static struct fgraph_ops fprofiler_ops = {
+ .ops = {
+ .flags = FTRACE_OPS_FL_INITIALIZED,
+ INIT_OPS_HASH(fprofiler_ops.ops)
+ },
.entryfunc = &profile_graph_entry,
.retfunc = &profile_graph_return,
};
@@ -3663,7 +3686,8 @@ static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops,
}
-static u64 ftrace_update_time;
+u64 ftrace_update_time;
+u64 ftrace_total_mod_time;
unsigned long ftrace_update_tot_cnt;
unsigned long ftrace_number_of_pages;
unsigned long ftrace_number_of_groups;
@@ -3683,7 +3707,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
bool init_nop = ftrace_need_init_nop();
struct ftrace_page *pg;
struct dyn_ftrace *p;
- u64 start, stop;
+ u64 start, stop, update_time;
unsigned long update_cnt = 0;
unsigned long rec_flags = 0;
int i;
@@ -3727,7 +3751,11 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
}
stop = ftrace_now(raw_smp_processor_id());
- ftrace_update_time = stop - start;
+ update_time = stop - start;
+ if (mod)
+ ftrace_total_mod_time += update_time;
+ else
+ ftrace_update_time = update_time;
ftrace_update_tot_cnt += update_cnt;
return 0;
@@ -4806,15 +4834,13 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod)
mod_g.len = strlen(mod_g.search);
}
- mutex_lock(&ftrace_lock);
+ guard(mutex)(&ftrace_lock);
if (unlikely(ftrace_disabled))
- goto out_unlock;
+ return 0;
- if (func_g.type == MATCH_INDEX) {
- found = add_rec_by_index(hash, &func_g, clear_filter);
- goto out_unlock;
- }
+ if (func_g.type == MATCH_INDEX)
+ return add_rec_by_index(hash, &func_g, clear_filter);
do_for_each_ftrace_rec(pg, rec) {
@@ -4823,16 +4849,12 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod)
if (ftrace_match_record(rec, &func_g, mod_match, exclude_mod)) {
ret = enter_record(hash, rec, clear_filter);
- if (ret < 0) {
- found = ret;
- goto out_unlock;
- }
+ if (ret < 0)
+ return ret;
found = 1;
}
cond_resched();
} while_for_each_ftrace_rec();
- out_unlock:
- mutex_unlock(&ftrace_lock);
return found;
}
@@ -4930,14 +4952,14 @@ static int cache_mod(struct trace_array *tr,
{
struct ftrace_mod_load *ftrace_mod, *n;
struct list_head *head = enable ? &tr->mod_trace : &tr->mod_notrace;
- int ret;
- mutex_lock(&ftrace_lock);
+ guard(mutex)(&ftrace_lock);
/* We do not cache inverse filters */
if (func[0] == '!') {
+ int ret = -EINVAL;
+
func++;
- ret = -EINVAL;
/* Look to remove this hash */
list_for_each_entry_safe(ftrace_mod, n, head, list) {
@@ -4953,20 +4975,15 @@ static int cache_mod(struct trace_array *tr,
continue;
}
}
- goto out;
+ return ret;
}
- ret = -EINVAL;
/* We only care about modules that have not been loaded yet */
if (module_exists(module))
- goto out;
+ return -EINVAL;
/* Save this string off, and execute it when the module is loaded */
- ret = ftrace_add_mod(tr, func, module, enable);
- out:
- mutex_unlock(&ftrace_lock);
-
- return ret;
+ return ftrace_add_mod(tr, func, module, enable);
}
static int
@@ -5076,6 +5093,9 @@ ftrace_mod_callback(struct trace_array *tr, struct ftrace_hash *hash,
char *func;
int ret;
+ if (!tr)
+ return -ENODEV;
+
/* match_records() modifies func, and we need the original */
func = kstrdup(func_orig, GFP_KERNEL);
if (!func)
@@ -5276,7 +5296,7 @@ static void release_probe(struct ftrace_func_probe *probe)
{
struct ftrace_probe_ops *probe_ops;
- mutex_lock(&ftrace_lock);
+ guard(mutex)(&ftrace_lock);
WARN_ON(probe->ref <= 0);
@@ -5294,7 +5314,6 @@ static void release_probe(struct ftrace_func_probe *probe)
list_del(&probe->list);
kfree(probe);
}
- mutex_unlock(&ftrace_lock);
}
static void acquire_probe_locked(struct ftrace_func_probe *probe)
@@ -6805,12 +6824,10 @@ ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer)
func_g.len = strlen(func_g.search);
- mutex_lock(&ftrace_lock);
+ guard(mutex)(&ftrace_lock);
- if (unlikely(ftrace_disabled)) {
- mutex_unlock(&ftrace_lock);
+ if (unlikely(ftrace_disabled))
return -ENODEV;
- }
do_for_each_ftrace_rec(pg, rec) {
@@ -6826,7 +6843,7 @@ ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer)
if (entry)
continue;
if (add_hash_entry(hash, rec->ip) == NULL)
- goto out;
+ return 0;
} else {
if (entry) {
free_hash_entry(hash, entry);
@@ -6835,13 +6852,8 @@ ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer)
}
}
} while_for_each_ftrace_rec();
-out:
- mutex_unlock(&ftrace_lock);
- if (fail)
- return -EINVAL;
-
- return 0;
+ return fail ? -EINVAL : 0;
}
static ssize_t
@@ -7920,7 +7932,7 @@ out:
void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
{
- kmsan_unpoison_memory(fregs, sizeof(*fregs));
+ kmsan_unpoison_memory(fregs, ftrace_regs_size());
__ftrace_ops_list_func(ip, parent_ip, NULL, fregs);
}
#else
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 5807116bcd0b..7e257e855dd1 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -482,6 +482,8 @@ struct ring_buffer_per_cpu {
unsigned long nr_pages;
unsigned int current_context;
struct list_head *pages;
+ /* pages generation counter, incremented when the list changes */
+ unsigned long cnt;
struct buffer_page *head_page; /* read from head */
struct buffer_page *tail_page; /* write to tail */
struct buffer_page *commit_page; /* committed pages */
@@ -1475,40 +1477,87 @@ static void rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK);
}
+static bool rb_check_links(struct ring_buffer_per_cpu *cpu_buffer,
+ struct list_head *list)
+{
+ if (RB_WARN_ON(cpu_buffer,
+ rb_list_head(rb_list_head(list->next)->prev) != list))
+ return false;
+
+ if (RB_WARN_ON(cpu_buffer,
+ rb_list_head(rb_list_head(list->prev)->next) != list))
+ return false;
+
+ return true;
+}
+
/**
* rb_check_pages - integrity check of buffer pages
* @cpu_buffer: CPU buffer with pages to test
*
* As a safety measure we check to make sure the data pages have not
* been corrupted.
- *
- * Callers of this function need to guarantee that the list of pages doesn't get
- * modified during the check. In particular, if it's possible that the function
- * is invoked with concurrent readers which can swap in a new reader page then
- * the caller should take cpu_buffer->reader_lock.
*/
static void rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
{
- struct list_head *head = rb_list_head(cpu_buffer->pages);
- struct list_head *tmp;
+ struct list_head *head, *tmp;
+ unsigned long buffer_cnt;
+ unsigned long flags;
+ int nr_loops = 0;
- if (RB_WARN_ON(cpu_buffer,
- rb_list_head(rb_list_head(head->next)->prev) != head))
+ /*
+ * Walk the linked list underpinning the ring buffer and validate all
+ * its next and prev links.
+ *
+ * The check acquires the reader_lock to avoid concurrent processing
+ * with code that could be modifying the list. However, the lock cannot
+ * be held for the entire duration of the walk, as this would make the
+ * time when interrupts are disabled non-deterministic, dependent on the
+ * ring buffer size. Therefore, the code releases and re-acquires the
+ * lock after checking each page. The ring_buffer_per_cpu.cnt variable
+ * is then used to detect if the list was modified while the lock was
+ * not held, in which case the check needs to be restarted.
+ *
+ * The code attempts to perform the check at most three times before
+ * giving up. This is acceptable because this is only a self-validation
+ * to detect problems early on. In practice, the list modification
+ * operations are fairly spaced, and so this check typically succeeds at
+ * most on the second try.
+ */
+again:
+ if (++nr_loops > 3)
return;
- if (RB_WARN_ON(cpu_buffer,
- rb_list_head(rb_list_head(head->prev)->next) != head))
- return;
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ head = rb_list_head(cpu_buffer->pages);
+ if (!rb_check_links(cpu_buffer, head))
+ goto out_locked;
+ buffer_cnt = cpu_buffer->cnt;
+ tmp = head;
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
- for (tmp = rb_list_head(head->next); tmp != head; tmp = rb_list_head(tmp->next)) {
- if (RB_WARN_ON(cpu_buffer,
- rb_list_head(rb_list_head(tmp->next)->prev) != tmp))
- return;
+ while (true) {
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
- if (RB_WARN_ON(cpu_buffer,
- rb_list_head(rb_list_head(tmp->prev)->next) != tmp))
- return;
+ if (buffer_cnt != cpu_buffer->cnt) {
+ /* The list was updated, try again. */
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ goto again;
+ }
+
+ tmp = rb_list_head(tmp->next);
+ if (tmp == head)
+ /* The iteration circled back, all is done. */
+ goto out_locked;
+
+ if (!rb_check_links(cpu_buffer, tmp))
+ goto out_locked;
+
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
+
+out_locked:
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
/*
@@ -2384,9 +2433,9 @@ EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
* __ring_buffer_alloc_range - allocate a new ring_buffer from existing memory
* @size: the size in bytes per cpu that is needed.
* @flags: attributes to set for the ring buffer.
+ * @order: sub-buffer order
* @start: start of allocated range
* @range_size: size of allocated range
- * @order: sub-buffer order
* @key: ring buffer reader_lock_key.
*
* Currently the only flag that is available is the RB_FL_OVERWRITE
@@ -2532,6 +2581,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
/* make sure pages points to a valid page in the ring buffer */
cpu_buffer->pages = next_page;
+ cpu_buffer->cnt++;
/* update head page */
if (head_bit)
@@ -2638,6 +2688,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
* pointer to point to end of list
*/
head_page->prev = last_page;
+ cpu_buffer->cnt++;
success = true;
break;
}
@@ -2873,12 +2924,8 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
*/
synchronize_rcu();
for_each_buffer_cpu(buffer, cpu) {
- unsigned long flags;
-
cpu_buffer = buffer->buffers[cpu];
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
rb_check_pages(cpu_buffer);
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
atomic_dec(&buffer->record_disabled);
}
@@ -4010,7 +4057,7 @@ static const char *show_irq_str(int bits)
return type[bits];
}
-/* Assume this is an trace event */
+/* Assume this is a trace event */
static const char *show_flags(struct ring_buffer_event *event)
{
struct trace_entry *entry;
@@ -5296,6 +5343,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
rb_inc_page(&cpu_buffer->head_page);
+ cpu_buffer->cnt++;
local_inc(&cpu_buffer->pages_read);
/* Finally update the reader page to the new head */
@@ -5835,12 +5883,9 @@ void
ring_buffer_read_finish(struct ring_buffer_iter *iter)
{
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
- unsigned long flags;
/* Use this opportunity to check the integrity of the ring buffer. */
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
rb_check_pages(cpu_buffer);
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
atomic_dec(&cpu_buffer->resize_disabled);
kfree(iter->event);
@@ -6757,6 +6802,7 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
/* Install the new pages, remove the head from the list */
cpu_buffer->pages = cpu_buffer->new_pages.next;
list_del_init(&cpu_buffer->new_pages);
+ cpu_buffer->cnt++;
cpu_buffer->head_page
= list_entry(cpu_buffer->pages, struct buffer_page, list);
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 008187ebd7fe..cdc3aea12c93 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -307,14 +307,14 @@ static void ring_buffer_producer(void)
if (!disable_reader) {
if (consumer_fifo)
trace_printk("Running Consumer at SCHED_FIFO %s\n",
- consumer_fifo == 1 ? "low" : "high");
+ str_low_high(consumer_fifo == 1));
else
trace_printk("Running Consumer at nice: %d\n",
consumer_nice);
}
if (producer_fifo)
trace_printk("Running Producer at SCHED_FIFO %s\n",
- producer_fifo == 1 ? "low" : "high");
+ str_low_high(producer_fifo == 1));
else
trace_printk("Running Producer at nice: %d\n",
producer_nice);
diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c
index dc819aec43e8..279c70e1bd74 100644
--- a/kernel/trace/rv/rv.c
+++ b/kernel/trace/rv/rv.c
@@ -41,7 +41,7 @@
* per-task monitor, and so on), and the helper functions that glue the
* monitor to the system via trace. Generally, a monitor includes some form
* of trace output as a reaction for event parsing and exceptions,
- * as depicted bellow:
+ * as depicted below:
*
* Linux +----- RV Monitor ----------------------------------+ Formal
* Realm | | Realm
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6a891e00aa7f..3ef047ed9705 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -593,19 +593,6 @@ int tracing_check_open_get_tr(struct trace_array *tr)
return 0;
}
-int call_filter_check_discard(struct trace_event_call *call, void *rec,
- struct trace_buffer *buffer,
- struct ring_buffer_event *event)
-{
- if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
- !filter_match_preds(call->filter, rec)) {
- __trace_event_discard_commit(buffer, event);
- return 1;
- }
-
- return 0;
-}
-
/**
* trace_find_filtered_pid - check if a pid exists in a filtered_pid list
* @filtered_pids: The list of pids to check
@@ -988,7 +975,8 @@ static inline void trace_access_lock_init(void)
#endif
#ifdef CONFIG_STACKTRACE
-static void __ftrace_trace_stack(struct trace_buffer *buffer,
+static void __ftrace_trace_stack(struct trace_array *tr,
+ struct trace_buffer *buffer,
unsigned int trace_ctx,
int skip, struct pt_regs *regs);
static inline void ftrace_trace_stack(struct trace_array *tr,
@@ -997,7 +985,8 @@ static inline void ftrace_trace_stack(struct trace_array *tr,
int skip, struct pt_regs *regs);
#else
-static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
+static inline void __ftrace_trace_stack(struct trace_array *tr,
+ struct trace_buffer *buffer,
unsigned int trace_ctx,
int skip, struct pt_regs *regs)
{
@@ -1934,7 +1923,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
max_data->critical_start = data->critical_start;
max_data->critical_end = data->critical_end;
- strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
+ strscpy(max_data->comm, tsk->comm);
max_data->pid = tsk->pid;
/*
* If tsk == current, then use current_uid(), as that does not use
@@ -2908,7 +2897,6 @@ void
trace_function(struct trace_array *tr, unsigned long ip, unsigned long
parent_ip, unsigned int trace_ctx)
{
- struct trace_event_call *call = &event_function;
struct trace_buffer *buffer = tr->array_buffer.buffer;
struct ring_buffer_event *event;
struct ftrace_entry *entry;
@@ -2921,11 +2909,9 @@ trace_function(struct trace_array *tr, unsigned long ip, unsigned long
entry->ip = ip;
entry->parent_ip = parent_ip;
- if (!call_filter_check_discard(call, entry, buffer, event)) {
- if (static_branch_unlikely(&trace_function_exports_enabled))
- ftrace_exports(event, TRACE_EXPORT_FUNCTION);
- __buffer_unlock_commit(buffer, event);
- }
+ if (static_branch_unlikely(&trace_function_exports_enabled))
+ ftrace_exports(event, TRACE_EXPORT_FUNCTION);
+ __buffer_unlock_commit(buffer, event);
}
#ifdef CONFIG_STACKTRACE
@@ -2933,7 +2919,7 @@ trace_function(struct trace_array *tr, unsigned long ip, unsigned long
/* Allow 4 levels of nesting: normal, softirq, irq, NMI */
#define FTRACE_KSTACK_NESTING 4
-#define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
+#define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING)
struct ftrace_stack {
unsigned long calls[FTRACE_KSTACK_ENTRIES];
@@ -2947,11 +2933,11 @@ struct ftrace_stacks {
static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
static DEFINE_PER_CPU(int, ftrace_stack_reserve);
-static void __ftrace_trace_stack(struct trace_buffer *buffer,
+static void __ftrace_trace_stack(struct trace_array *tr,
+ struct trace_buffer *buffer,
unsigned int trace_ctx,
int skip, struct pt_regs *regs)
{
- struct trace_event_call *call = &event_kernel_stack;
struct ring_buffer_event *event;
unsigned int size, nr_entries;
struct ftrace_stack *fstack;
@@ -2994,6 +2980,20 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer,
nr_entries = stack_trace_save(fstack->calls, size, skip);
}
+#ifdef CONFIG_DYNAMIC_FTRACE
+ /* Mark entry of stack trace as trampoline code */
+ if (tr->ops && tr->ops->trampoline) {
+ unsigned long tramp_start = tr->ops->trampoline;
+ unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
+ unsigned long *calls = fstack->calls;
+
+ for (int i = 0; i < nr_entries; i++) {
+ if (calls[i] >= tramp_start && calls[i] < tramp_end)
+ calls[i] = FTRACE_TRAMPOLINE_MARKER;
+ }
+ }
+#endif
+
event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
struct_size(entry, caller, nr_entries),
trace_ctx);
@@ -3005,8 +3005,7 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer,
memcpy(&entry->caller, fstack->calls,
flex_array_size(entry, caller, nr_entries));
- if (!call_filter_check_discard(call, entry, buffer, event))
- __buffer_unlock_commit(buffer, event);
+ __buffer_unlock_commit(buffer, event);
out:
/* Again, don't let gcc optimize things here */
@@ -3024,7 +3023,7 @@ static inline void ftrace_trace_stack(struct trace_array *tr,
if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
return;
- __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
+ __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
}
void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
@@ -3033,7 +3032,7 @@ void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
struct trace_buffer *buffer = tr->array_buffer.buffer;
if (rcu_is_watching()) {
- __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
+ __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
return;
}
@@ -3050,7 +3049,7 @@ void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
return;
ct_irq_enter_irqson();
- __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
+ __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
ct_irq_exit_irqson();
}
@@ -3067,8 +3066,8 @@ void trace_dump_stack(int skip)
/* Skip 1 to skip this function. */
skip++;
#endif
- __ftrace_trace_stack(printk_trace->array_buffer.buffer,
- tracing_gen_ctx(), skip, NULL);
+ __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
+ tracing_gen_ctx(), skip, NULL);
}
EXPORT_SYMBOL_GPL(trace_dump_stack);
@@ -3079,7 +3078,6 @@ static void
ftrace_trace_userstack(struct trace_array *tr,
struct trace_buffer *buffer, unsigned int trace_ctx)
{
- struct trace_event_call *call = &event_user_stack;
struct ring_buffer_event *event;
struct userstack_entry *entry;
@@ -3113,8 +3111,7 @@ ftrace_trace_userstack(struct trace_array *tr,
memset(&entry->caller, 0, sizeof(entry->caller));
stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
- if (!call_filter_check_discard(call, entry, buffer, event))
- __buffer_unlock_commit(buffer, event);
+ __buffer_unlock_commit(buffer, event);
out_drop_count:
__this_cpu_dec(user_stack_count);
@@ -3283,7 +3280,6 @@ static void trace_printk_start_stop_comm(int enabled)
*/
int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
{
- struct trace_event_call *call = &event_bprint;
struct ring_buffer_event *event;
struct trace_buffer *buffer;
struct trace_array *tr = READ_ONCE(printk_trace);
@@ -3327,10 +3323,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
entry->fmt = fmt;
memcpy(entry->buf, tbuffer, sizeof(u32) * len);
- if (!call_filter_check_discard(call, entry, buffer, event)) {
- __buffer_unlock_commit(buffer, event);
- ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
- }
+ __buffer_unlock_commit(buffer, event);
+ ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
out:
ring_buffer_nest_end(buffer);
@@ -3350,7 +3344,6 @@ static int
__trace_array_vprintk(struct trace_buffer *buffer,
unsigned long ip, const char *fmt, va_list args)
{
- struct trace_event_call *call = &event_print;
struct ring_buffer_event *event;
int len = 0, size;
struct print_entry *entry;
@@ -3385,10 +3378,8 @@ __trace_array_vprintk(struct trace_buffer *buffer,
entry->ip = ip;
memcpy(&entry->buf, tbuffer, len + 1);
- if (!call_filter_check_discard(call, entry, buffer, event)) {
- __buffer_unlock_commit(buffer, event);
- ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
- }
+ __buffer_unlock_commit(buffer, event);
+ ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
out:
ring_buffer_nest_end(buffer);
@@ -8587,15 +8578,22 @@ tracing_read_dyn_info(struct file *filp, char __user *ubuf,
char *buf;
int r;
- /* 256 should be plenty to hold the amount needed */
- buf = kmalloc(256, GFP_KERNEL);
+ /* 512 should be plenty to hold the amount needed */
+#define DYN_INFO_BUF_SIZE 512
+
+ buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
if (!buf)
return -ENOMEM;
- r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
+ r = scnprintf(buf, DYN_INFO_BUF_SIZE,
+ "%ld pages:%ld groups: %ld\n"
+ "ftrace boot update time = %llu (ns)\n"
+ "ftrace module total update time = %llu (ns)\n",
ftrace_update_tot_cnt,
ftrace_number_of_pages,
- ftrace_number_of_groups);
+ ftrace_number_of_groups,
+ ftrace_update_time,
+ ftrace_total_mod_time);
ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
kfree(buf);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c866991b9c78..266740b4e121 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -46,6 +46,7 @@ enum trace_type {
TRACE_BRANCH,
TRACE_GRAPH_RET,
TRACE_GRAPH_ENT,
+ TRACE_GRAPH_RETADDR_ENT,
TRACE_USER_STACK,
TRACE_BLK,
TRACE_BPUTS,
@@ -512,6 +513,8 @@ extern void __ftrace_bad_type(void);
IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \
TRACE_GRAPH_ENT); \
+ IF_ASSIGN(var, ent, struct fgraph_retaddr_ent_entry,\
+ TRACE_GRAPH_RETADDR_ENT); \
IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
TRACE_GRAPH_RET); \
IF_ASSIGN(var, ent, struct func_repeats_entry, \
@@ -772,6 +775,8 @@ extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
extern unsigned long ftrace_update_tot_cnt;
extern unsigned long ftrace_number_of_pages;
extern unsigned long ftrace_number_of_groups;
+extern u64 ftrace_update_time;
+extern u64 ftrace_total_mod_time;
void ftrace_init_trace_array(struct trace_array *tr);
#else
static inline void ftrace_init_trace_array(struct trace_array *tr) { }
@@ -879,6 +884,7 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
#define TRACE_GRAPH_GRAPH_TIME 0x400
#define TRACE_GRAPH_PRINT_RETVAL 0x800
#define TRACE_GRAPH_PRINT_RETVAL_HEX 0x1000
+#define TRACE_GRAPH_PRINT_RETADDR 0x2000
#define TRACE_GRAPH_PRINT_FILL_SHIFT 28
#define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
@@ -900,6 +906,10 @@ extern void graph_trace_close(struct trace_iterator *iter);
extern int __trace_graph_entry(struct trace_array *tr,
struct ftrace_graph_ent *trace,
unsigned int trace_ctx);
+extern int __trace_graph_retaddr_entry(struct trace_array *tr,
+ struct ftrace_graph_ent *trace,
+ unsigned int trace_ctx,
+ unsigned long retaddr);
extern void __trace_graph_return(struct trace_array *tr,
struct ftrace_graph_ret *trace,
unsigned int trace_ctx);
@@ -1048,6 +1058,7 @@ static inline void ftrace_graph_addr_finish(struct fgraph_ops *gops, struct ftra
#endif /* CONFIG_DYNAMIC_FTRACE */
extern unsigned int fgraph_max_depth;
+extern bool fgraph_sleep_time;
static inline bool
ftrace_graph_ignore_func(struct fgraph_ops *gops, struct ftrace_graph_ent *trace)
@@ -1429,10 +1440,6 @@ struct trace_subsystem_dir {
int nr_events;
};
-extern int call_filter_check_discard(struct trace_event_call *call, void *rec,
- struct trace_buffer *buffer,
- struct ring_buffer_event *event);
-
void trace_buffer_unlock_commit_regs(struct trace_array *tr,
struct trace_buffer *buffer,
struct ring_buffer_event *event,
@@ -2176,4 +2183,11 @@ static inline int rv_init_interface(void)
}
#endif
+/*
+ * This is used only to distinguish
+ * function address from trampoline code.
+ * So this value has no meaning.
+ */
+#define FTRACE_TRAMPOLINE_MARKER ((unsigned long) INT_MAX)
+
#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index e47fdb4c92fb..6d08a5523ce0 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -30,7 +30,6 @@ static struct trace_array *branch_tracer;
static void
probe_likely_condition(struct ftrace_likely_data *f, int val, int expect)
{
- struct trace_event_call *call = &event_branch;
struct trace_array *tr = branch_tracer;
struct trace_buffer *buffer;
struct trace_array_cpu *data;
@@ -74,16 +73,13 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect)
p--;
p++;
- strncpy(entry->func, f->data.func, TRACE_FUNC_SIZE);
- strncpy(entry->file, p, TRACE_FILE_SIZE);
- entry->func[TRACE_FUNC_SIZE] = 0;
- entry->file[TRACE_FILE_SIZE] = 0;
+ strscpy(entry->func, f->data.func);
+ strscpy(entry->file, p);
entry->constant = f->constant;
entry->line = f->data.line;
entry->correct = val == expect;
- if (!call_filter_check_discard(call, entry, buffer, event))
- trace_buffer_unlock_commit_nostack(buffer, event);
+ trace_buffer_unlock_commit_nostack(buffer, event);
out:
current->trace_recursion &= ~TRACE_BRANCH_BIT;
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 4702efb00ff2..4cb2ebc439be 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -154,5 +154,5 @@ static atomic64_t trace_counter;
*/
u64 notrace trace_clock_counter(void)
{
- return atomic64_add_return(1, &trace_counter);
+ return atomic64_inc_return(&trace_counter);
}
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index c47422b20908..82fd174ebbe0 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -85,9 +85,35 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
);
-/* Function return entry */
+#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
+
+/* Function call entry with a return address */
+FTRACE_ENTRY_PACKED(fgraph_retaddr_entry, fgraph_retaddr_ent_entry,
+
+ TRACE_GRAPH_RETADDR_ENT,
+
+ F_STRUCT(
+ __field_struct( struct fgraph_retaddr_ent, graph_ent )
+ __field_packed( unsigned long, graph_ent, func )
+ __field_packed( int, graph_ent, depth )
+ __field_packed( unsigned long, graph_ent, retaddr )
+ ),
+
+ F_printk("--> %ps (%d) <- %ps", (void *)__entry->func, __entry->depth,
+ (void *)__entry->retaddr)
+);
+
+#else
+
+#ifndef fgraph_retaddr_ent_entry
+#define fgraph_retaddr_ent_entry ftrace_graph_ent_entry
+#endif
+
+#endif
+
#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
+/* Function return entry */
FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
TRACE_GRAPH_RET,
@@ -110,6 +136,7 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
#else
+/* Function return entry */
FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
TRACE_GRAPH_RET,
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 05e791241812..3ff9caa4a71b 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -352,10 +352,16 @@ void perf_uprobe_destroy(struct perf_event *p_event)
int perf_trace_add(struct perf_event *p_event, int flags)
{
struct trace_event_call *tp_event = p_event->tp_event;
+ struct hw_perf_event *hwc = &p_event->hw;
if (!(flags & PERF_EF_START))
p_event->hw.state = PERF_HES_STOPPED;
+ if (is_sampling_event(p_event)) {
+ hwc->last_period = hwc->sample_period;
+ perf_swevent_set_period(p_event);
+ }
+
/*
* If TRACE_REG_PERF_ADD returns false; no custom action was performed
* and we need to take the default action of enqueueing our event on
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 7266ec2a4eea..77e68efbd43e 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -3149,8 +3149,6 @@ static void __trace_remove_event_call(struct trace_event_call *call)
{
event_remove(call);
trace_destroy_fields(call);
- free_event_filter(call->filter);
- call->filter = NULL;
}
static int probe_remove_event_call(struct trace_event_call *call)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 0c611b281a5b..78051de581e7 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1616,7 +1616,7 @@ static int parse_pred(const char *str, void *data,
goto err_free;
}
- strncpy(num_buf, str + s, len);
+ memcpy(num_buf, str + s, len);
num_buf[len] = 0;
ret = kstrtoul(num_buf, 0, &ip);
@@ -1694,7 +1694,7 @@ static int parse_pred(const char *str, void *data,
if (!pred->regex)
goto err_mem;
pred->regex->len = len;
- strncpy(pred->regex->pattern, str + s, len);
+ memcpy(pred->regex->pattern, str + s, len);
pred->regex->pattern[len] = 0;
} else if (!strncmp(str + i, "CPUS", 4)) {
@@ -1859,7 +1859,7 @@ static int parse_pred(const char *str, void *data,
if (!pred->regex)
goto err_mem;
pred->regex->len = len;
- strncpy(pred->regex->pattern, str + s, len);
+ memcpy(pred->regex->pattern, str + s, len);
pred->regex->pattern[len] = 0;
filter_build_regex(pred);
@@ -1919,7 +1919,7 @@ static int parse_pred(const char *str, void *data,
goto err_free;
}
- strncpy(num_buf, str + s, len);
+ memcpy(num_buf, str + s, len);
num_buf[len] = 0;
/* Make sure it is a value */
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 5f9119eb7c67..9c058aa8baf3 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -822,7 +822,7 @@ static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals,
{
struct tracepoint *tp = event->tp;
- if (unlikely(atomic_read(&tp->key.enabled) > 0)) {
+ if (unlikely(static_key_enabled(&tp->key))) {
struct tracepoint_func *probe_func_ptr;
synth_probe_func_t probe_func;
void *__data;
@@ -1354,10 +1354,7 @@ static const char *hist_field_name(struct hist_field *field,
} else if (field->flags & HIST_FIELD_FL_TIMESTAMP)
field_name = "common_timestamp";
else if (field->flags & HIST_FIELD_FL_STACKTRACE) {
- if (field->field)
- field_name = field->field->name;
- else
- field_name = "common_stacktrace";
+ field_name = "common_stacktrace";
} else if (field->flags & HIST_FIELD_FL_HITCOUNT)
field_name = "hitcount";
@@ -1599,7 +1596,7 @@ static inline void save_comm(char *comm, struct task_struct *task)
return;
}
- strncpy(comm, task->comm, TASK_COMM_LEN);
+ strscpy(comm, task->comm, TASK_COMM_LEN);
}
static void hist_elt_data_free(struct hist_elt_data *elt_data)
@@ -3405,7 +3402,7 @@ static bool cond_snapshot_update(struct trace_array *tr, void *cond_data)
elt_data = context->elt->private_data;
track_elt_data = track_data->elt.private_data;
if (elt_data->comm)
- strncpy(track_elt_data->comm, elt_data->comm, TASK_COMM_LEN);
+ strscpy(track_elt_data->comm, elt_data->comm, TASK_COMM_LEN);
track_data->updated = true;
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 42b0d998d103..17bcad8f79de 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -1676,7 +1676,7 @@ static void update_enable_bit_for(struct user_event *user)
struct tracepoint *tp = &user->tracepoint;
char status = 0;
- if (atomic_read(&tp->key.enabled) > 0) {
+ if (static_key_enabled(&tp->key)) {
struct tracepoint_func *probe_func_ptr;
user_event_func_t probe_func;
@@ -2280,7 +2280,7 @@ static ssize_t user_events_write_core(struct file *file, struct iov_iter *i)
* It's possible key.enabled disables after this check, however
* we don't mind if a few events are included in this condition.
*/
- if (likely(atomic_read(&tp->key.enabled) > 0)) {
+ if (likely(static_key_enabled(&tp->key))) {
struct tracepoint_func *probe_func_ptr;
user_event_func_t probe_func;
struct iov_iter copy;
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 3b0cea37e029..74c353164ca1 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -176,6 +176,27 @@ static void function_trace_start(struct trace_array *tr)
tracing_reset_online_cpus(&tr->array_buffer);
}
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static __always_inline unsigned long
+function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
+{
+ unsigned long true_parent_ip;
+ int idx = 0;
+
+ true_parent_ip = parent_ip;
+ if (unlikely(parent_ip == (unsigned long)&return_to_handler) && fregs)
+ true_parent_ip = ftrace_graph_ret_addr(current, &idx, parent_ip,
+ (unsigned long *)ftrace_regs_get_stack_pointer(fregs));
+ return true_parent_ip;
+}
+#else
+static __always_inline unsigned long
+function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
+{
+ return parent_ip;
+}
+#endif
+
static void
function_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
@@ -184,7 +205,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
struct trace_array_cpu *data;
unsigned int trace_ctx;
int bit;
- int cpu;
if (unlikely(!tr->function_enabled))
return;
@@ -193,10 +213,11 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
if (bit < 0)
return;
+ parent_ip = function_get_true_parent_ip(parent_ip, fregs);
+
trace_ctx = tracing_gen_ctx();
- cpu = smp_processor_id();
- data = per_cpu_ptr(tr->array_buffer.data, cpu);
+ data = this_cpu_ptr(tr->array_buffer.data);
if (!atomic_read(&data->disabled))
trace_function(tr, ip, parent_ip, trace_ctx);
@@ -241,6 +262,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
* recursive protection is performed.
*/
local_irq_save(flags);
+ parent_ip = function_get_true_parent_ip(parent_ip, fregs);
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->array_buffer.data, cpu);
disabled = atomic_inc_return(&data->disabled);
@@ -300,7 +322,6 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
unsigned int trace_ctx;
unsigned long flags;
int bit;
- int cpu;
if (unlikely(!tr->function_enabled))
return;
@@ -309,8 +330,8 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
if (bit < 0)
return;
- cpu = smp_processor_id();
- data = per_cpu_ptr(tr->array_buffer.data, cpu);
+ parent_ip = function_get_true_parent_ip(parent_ip, fregs);
+ data = this_cpu_ptr(tr->array_buffer.data);
if (atomic_read(&data->disabled))
goto out;
@@ -321,7 +342,7 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
* TODO: think about a solution that is better than just hoping to be
* lucky.
*/
- last_info = per_cpu_ptr(tr->last_func_repeats, cpu);
+ last_info = this_cpu_ptr(tr->last_func_repeats);
if (is_repeat_check(tr, last_info, ip, parent_ip))
goto out;
@@ -356,6 +377,7 @@ function_stack_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
* recursive protection is performed.
*/
local_irq_save(flags);
+ parent_ip = function_get_true_parent_ip(parent_ip, fregs);
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->array_buffer.data, cpu);
disabled = atomic_inc_return(&data->disabled);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index a569daaac4c4..5504b5e4e7b4 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -31,7 +31,10 @@ struct fgraph_data {
struct fgraph_cpu_data __percpu *cpu_data;
/* Place to preserve last processed entry. */
- struct ftrace_graph_ent_entry ent;
+ union {
+ struct ftrace_graph_ent_entry ent;
+ struct fgraph_retaddr_ent_entry rent;
+ } ent;
struct ftrace_graph_ret_entry ret;
int failed;
int cpu;
@@ -64,6 +67,10 @@ static struct tracer_opt trace_opts[] = {
/* Display function return value in hexadecimal format ? */
{ TRACER_OPT(funcgraph-retval-hex, TRACE_GRAPH_PRINT_RETVAL_HEX) },
#endif
+#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
+ /* Display function return address ? */
+ { TRACER_OPT(funcgraph-retaddr, TRACE_GRAPH_PRINT_RETADDR) },
+#endif
/* Include sleep time (scheduled out) between entry and return */
{ TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
@@ -83,6 +90,11 @@ static struct tracer_flags tracer_flags = {
.opts = trace_opts
};
+static bool tracer_flags_is_set(u32 flags)
+{
+ return (tracer_flags.val & flags) == flags;
+}
+
/*
* DURATION column is being also used to display IRQ signs,
* following values are used by print_graph_irq and others
@@ -102,7 +114,6 @@ int __trace_graph_entry(struct trace_array *tr,
struct ftrace_graph_ent *trace,
unsigned int trace_ctx)
{
- struct trace_event_call *call = &event_funcgraph_entry;
struct ring_buffer_event *event;
struct trace_buffer *buffer = tr->array_buffer.buffer;
struct ftrace_graph_ent_entry *entry;
@@ -113,11 +124,42 @@ int __trace_graph_entry(struct trace_array *tr,
return 0;
entry = ring_buffer_event_data(event);
entry->graph_ent = *trace;
- if (!call_filter_check_discard(call, entry, buffer, event))
- trace_buffer_unlock_commit_nostack(buffer, event);
+ trace_buffer_unlock_commit_nostack(buffer, event);
+
+ return 1;
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
+int __trace_graph_retaddr_entry(struct trace_array *tr,
+ struct ftrace_graph_ent *trace,
+ unsigned int trace_ctx,
+ unsigned long retaddr)
+{
+ struct ring_buffer_event *event;
+ struct trace_buffer *buffer = tr->array_buffer.buffer;
+ struct fgraph_retaddr_ent_entry *entry;
+
+ event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RETADDR_ENT,
+ sizeof(*entry), trace_ctx);
+ if (!event)
+ return 0;
+ entry = ring_buffer_event_data(event);
+ entry->graph_ent.func = trace->func;
+ entry->graph_ent.depth = trace->depth;
+ entry->graph_ent.retaddr = retaddr;
+ trace_buffer_unlock_commit_nostack(buffer, event);
return 1;
}
+#else
+int __trace_graph_retaddr_entry(struct trace_array *tr,
+ struct ftrace_graph_ent *trace,
+ unsigned int trace_ctx,
+ unsigned long retaddr)
+{
+ return 1;
+}
+#endif
static inline int ftrace_graph_ignore_irqs(void)
{
@@ -127,12 +169,18 @@ static inline int ftrace_graph_ignore_irqs(void)
return in_hardirq();
}
+struct fgraph_times {
+ unsigned long long calltime;
+ unsigned long long sleeptime; /* may be optional! */
+};
+
int trace_graph_entry(struct ftrace_graph_ent *trace,
struct fgraph_ops *gops)
{
unsigned long *task_var = fgraph_get_task_var(gops);
struct trace_array *tr = gops->private;
struct trace_array_cpu *data;
+ struct fgraph_times *ftimes;
unsigned long flags;
unsigned int trace_ctx;
long disabled;
@@ -167,6 +215,19 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
if (ftrace_graph_ignore_irqs())
return 0;
+ if (fgraph_sleep_time) {
+ /* Only need to record the calltime */
+ ftimes = fgraph_reserve_data(gops->idx, sizeof(ftimes->calltime));
+ } else {
+ ftimes = fgraph_reserve_data(gops->idx, sizeof(*ftimes));
+ if (ftimes)
+ ftimes->sleeptime = current->ftrace_sleeptime;
+ }
+ if (!ftimes)
+ return 0;
+
+ ftimes->calltime = trace_clock_local();
+
/*
* Stop here if tracing_threshold is set. We only write function return
* events to the ring buffer.
@@ -180,7 +241,13 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1)) {
trace_ctx = tracing_gen_ctx_flags(flags);
- ret = __trace_graph_entry(tr, trace, trace_ctx);
+ if (unlikely(IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) &&
+ tracer_flags_is_set(TRACE_GRAPH_PRINT_RETADDR))) {
+ unsigned long retaddr = ftrace_graph_top_ret_addr(current);
+
+ ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, retaddr);
+ } else
+ ret = __trace_graph_entry(tr, trace, trace_ctx);
} else {
ret = 0;
}
@@ -223,7 +290,6 @@ void __trace_graph_return(struct trace_array *tr,
struct ftrace_graph_ret *trace,
unsigned int trace_ctx)
{
- struct trace_event_call *call = &event_funcgraph_exit;
struct ring_buffer_event *event;
struct trace_buffer *buffer = tr->array_buffer.buffer;
struct ftrace_graph_ret_entry *entry;
@@ -234,8 +300,17 @@ void __trace_graph_return(struct trace_array *tr,
return;
entry = ring_buffer_event_data(event);
entry->ret = *trace;
- if (!call_filter_check_discard(call, entry, buffer, event))
- trace_buffer_unlock_commit_nostack(buffer, event);
+ trace_buffer_unlock_commit_nostack(buffer, event);
+}
+
+static void handle_nosleeptime(struct ftrace_graph_ret *trace,
+ struct fgraph_times *ftimes,
+ int size)
+{
+ if (fgraph_sleep_time || size < sizeof(*ftimes))
+ return;
+
+ ftimes->calltime += current->ftrace_sleeptime - ftimes->sleeptime;
}
void trace_graph_return(struct ftrace_graph_ret *trace,
@@ -244,9 +319,11 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
unsigned long *task_var = fgraph_get_task_var(gops);
struct trace_array *tr = gops->private;
struct trace_array_cpu *data;
+ struct fgraph_times *ftimes;
unsigned long flags;
unsigned int trace_ctx;
long disabled;
+ int size;
int cpu;
ftrace_graph_addr_finish(gops, trace);
@@ -256,6 +333,14 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
return;
}
+ ftimes = fgraph_retrieve_data(gops->idx, &size);
+ if (!ftimes)
+ return;
+
+ handle_nosleeptime(trace, ftimes, size);
+
+ trace->calltime = ftimes->calltime;
+
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->array_buffer.data, cpu);
@@ -271,6 +356,9 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
struct fgraph_ops *gops)
{
+ struct fgraph_times *ftimes;
+ int size;
+
ftrace_graph_addr_finish(gops, trace);
if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) {
@@ -278,8 +366,16 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
return;
}
+ ftimes = fgraph_retrieve_data(gops->idx, &size);
+ if (!ftimes)
+ return;
+
+ handle_nosleeptime(trace, ftimes, size);
+
+ trace->calltime = ftimes->calltime;
+
if (tracing_thresh &&
- (trace->rettime - trace->calltime < tracing_thresh))
+ (trace->rettime - ftimes->calltime < tracing_thresh))
return;
else
trace_graph_return(trace, gops);
@@ -457,7 +553,7 @@ get_return_for_leaf(struct trace_iterator *iter,
* then we just reuse the data from before.
*/
if (data && data->failed) {
- curr = &data->ent;
+ curr = &data->ent.ent;
next = &data->ret;
} else {
@@ -487,7 +583,10 @@ get_return_for_leaf(struct trace_iterator *iter,
* Save current and next entries for later reference
* if the output fails.
*/
- data->ent = *curr;
+ if (unlikely(curr->ent.type == TRACE_GRAPH_RETADDR_ENT))
+ data->ent.rent = *(struct fgraph_retaddr_ent_entry *)curr;
+ else
+ data->ent.ent = *curr;
/*
* If the next event is not a return type, then
* we only care about what type it is. Otherwise we can
@@ -651,52 +750,96 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
}
#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
-
#define __TRACE_GRAPH_PRINT_RETVAL TRACE_GRAPH_PRINT_RETVAL
+#else
+#define __TRACE_GRAPH_PRINT_RETVAL 0
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
+#define __TRACE_GRAPH_PRINT_RETADDR TRACE_GRAPH_PRINT_RETADDR
+static void print_graph_retaddr(struct trace_seq *s, struct fgraph_retaddr_ent_entry *entry,
+ u32 trace_flags, bool comment)
+{
+ if (comment)
+ trace_seq_puts(s, " /*");
+
+ trace_seq_puts(s, " <-");
+ seq_print_ip_sym(s, entry->graph_ent.retaddr, trace_flags | TRACE_ITER_SYM_OFFSET);
+
+ if (comment)
+ trace_seq_puts(s, " */");
+}
+#else
+#define __TRACE_GRAPH_PRINT_RETADDR 0
+#define print_graph_retaddr(_seq, _entry, _tflags, _comment) do { } while (0)
+#endif
-static void print_graph_retval(struct trace_seq *s, unsigned long retval,
- bool leaf, void *func, bool hex_format)
+#if defined(CONFIG_FUNCTION_GRAPH_RETVAL) || defined(CONFIG_FUNCTION_GRAPH_RETADDR)
+
+static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entry *entry,
+ struct ftrace_graph_ret *graph_ret, void *func,
+ u32 opt_flags, u32 trace_flags)
{
unsigned long err_code = 0;
+ unsigned long retval = 0;
+ bool print_retaddr = false;
+ bool print_retval = false;
+ bool hex_format = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL_HEX);
- if (retval == 0 || hex_format)
- goto done;
+#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
+ retval = graph_ret->retval;
+ print_retval = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL);
+#endif
- /* Check if the return value matches the negative format */
- if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) &&
- (((u64)retval) >> 32) == 0) {
- /* sign extension */
- err_code = (unsigned long)(s32)retval;
- } else {
- err_code = retval;
+#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
+ print_retaddr = !!(opt_flags & TRACE_GRAPH_PRINT_RETADDR);
+#endif
+
+ if (print_retval && retval && !hex_format) {
+ /* Check if the return value matches the negative format */
+ if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) &&
+ (((u64)retval) >> 32) == 0) {
+ err_code = sign_extend64(retval, 31);
+ } else {
+ err_code = retval;
+ }
+
+ if (!IS_ERR_VALUE(err_code))
+ err_code = 0;
}
- if (!IS_ERR_VALUE(err_code))
- err_code = 0;
+ if (entry) {
+ if (entry->ent.type != TRACE_GRAPH_RETADDR_ENT)
+ print_retaddr = false;
-done:
- if (leaf) {
- if (hex_format || (err_code == 0))
- trace_seq_printf(s, "%ps(); /* = 0x%lx */\n",
- func, retval);
+ trace_seq_printf(s, "%ps();", func);
+ if (print_retval || print_retaddr)
+ trace_seq_puts(s, " /*");
else
- trace_seq_printf(s, "%ps(); /* = %ld */\n",
- func, err_code);
+ trace_seq_putc(s, '\n');
} else {
+ print_retaddr = false;
+ trace_seq_printf(s, "} /* %ps", func);
+ }
+
+ if (print_retaddr)
+ print_graph_retaddr(s, (struct fgraph_retaddr_ent_entry *)entry,
+ trace_flags, false);
+
+ if (print_retval) {
if (hex_format || (err_code == 0))
- trace_seq_printf(s, "} /* %ps = 0x%lx */\n",
- func, retval);
+ trace_seq_printf(s, " ret=0x%lx", retval);
else
- trace_seq_printf(s, "} /* %ps = %ld */\n",
- func, err_code);
+ trace_seq_printf(s, " ret=%ld", err_code);
}
+
+ if (!entry || print_retval || print_retaddr)
+ trace_seq_puts(s, " */\n");
}
#else
-#define __TRACE_GRAPH_PRINT_RETVAL 0
-
-#define print_graph_retval(_seq, _retval, _leaf, _func, _format) do {} while (0)
+#define print_graph_retval(_seq, _ent, _ret, _func, _opt_flags, _trace_flags) do {} while (0)
#endif
@@ -748,14 +891,15 @@ print_graph_entry_leaf(struct trace_iterator *iter,
trace_seq_putc(s, ' ');
/*
- * Write out the function return value if the option function-retval is
- * enabled.
+ * Write out the function return value or return address
*/
- if (flags & __TRACE_GRAPH_PRINT_RETVAL)
- print_graph_retval(s, graph_ret->retval, true, (void *)func,
- !!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
- else
+ if (flags & (__TRACE_GRAPH_PRINT_RETVAL | __TRACE_GRAPH_PRINT_RETADDR)) {
+ print_graph_retval(s, entry, graph_ret,
+ (void *)graph_ret->func + iter->tr->text_delta,
+ flags, tr->trace_flags);
+ } else {
trace_seq_printf(s, "%ps();\n", (void *)func);
+ }
print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET,
cpu, iter->ent->pid, flags);
@@ -796,7 +940,12 @@ print_graph_entry_nested(struct trace_iterator *iter,
func = call->func + iter->tr->text_delta;
- trace_seq_printf(s, "%ps() {\n", (void *)func);
+ trace_seq_printf(s, "%ps() {", (void *)func);
+ if (flags & __TRACE_GRAPH_PRINT_RETADDR &&
+ entry->ent.type == TRACE_GRAPH_RETADDR_ENT)
+ print_graph_retaddr(s, (struct fgraph_retaddr_ent_entry *)entry,
+ tr->trace_flags, true);
+ trace_seq_putc(s, '\n');
if (trace_seq_has_overflowed(s))
return TRACE_TYPE_PARTIAL_LINE;
@@ -1043,11 +1192,10 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
/*
* Always write out the function name and its return value if the
- * function-retval option is enabled.
+ * funcgraph-retval option is enabled.
*/
if (flags & __TRACE_GRAPH_PRINT_RETVAL) {
- print_graph_retval(s, trace->retval, false, (void *)func,
- !!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
+ print_graph_retval(s, NULL, trace, (void *)func, flags, tr->trace_flags);
} else {
/*
* If the return function does not have a matching entry,
@@ -1162,7 +1310,7 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
* to print out the missing entry which would never go out.
*/
if (data && data->failed) {
- field = &data->ent;
+ field = &data->ent.ent;
iter->cpu = data->cpu;
ret = print_graph_entry(field, s, iter, flags);
if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
@@ -1186,6 +1334,16 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
saved = *field;
return print_graph_entry(&saved, s, iter, flags);
}
+#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
+ case TRACE_GRAPH_RETADDR_ENT: {
+ struct fgraph_retaddr_ent_entry saved;
+ struct fgraph_retaddr_ent_entry *rfield;
+
+ trace_assign_type(rfield, entry);
+ saved = *rfield;
+ return print_graph_entry((struct ftrace_graph_ent_entry *)&saved, s, iter, flags);
+ }
+#endif
case TRACE_GRAPH_RET: {
struct ftrace_graph_ret_entry *field;
trace_assign_type(field, entry);
@@ -1380,6 +1538,13 @@ static struct trace_event graph_trace_entry_event = {
.funcs = &graph_functions,
};
+#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
+static struct trace_event graph_trace_retaddr_entry_event = {
+ .type = TRACE_GRAPH_RETADDR_ENT,
+ .funcs = &graph_functions,
+};
+#endif
+
static struct trace_event graph_trace_ret_event = {
.type = TRACE_GRAPH_RET,
.funcs = &graph_functions
@@ -1466,6 +1631,13 @@ static __init int init_graph_trace(void)
return 1;
}
+#ifdef CONFIG_FUNCTION_GRAPH_RETADDR
+ if (!register_trace_event(&graph_trace_retaddr_entry_event)) {
+ pr_warn("Warning: could not register graph trace retaddr events\n");
+ return 1;
+ }
+#endif
+
if (!register_trace_event(&graph_trace_ret_event)) {
pr_warn("Warning: could not register graph trace events\n");
return 1;
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index 3bd6071441ad..b65353ec2837 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -130,7 +130,6 @@ static bool hwlat_busy;
static void trace_hwlat_sample(struct hwlat_sample *sample)
{
struct trace_array *tr = hwlat_trace;
- struct trace_event_call *call = &event_hwlat;
struct trace_buffer *buffer = tr->array_buffer.buffer;
struct ring_buffer_event *event;
struct hwlat_entry *entry;
@@ -148,8 +147,7 @@ static void trace_hwlat_sample(struct hwlat_sample *sample)
entry->nmi_count = sample->nmi_count;
entry->count = sample->count;
- if (!call_filter_check_discard(call, entry, buffer, event))
- trace_buffer_unlock_commit_nostack(buffer, event);
+ trace_buffer_unlock_commit_nostack(buffer, event);
}
/* Macros to encapsulate the time capturing infrastructure */
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
index 59857a1ee44c..1e72d20b3c2f 100644
--- a/kernel/trace/trace_kdb.c
+++ b/kernel/trace/trace_kdb.c
@@ -96,22 +96,19 @@ static int kdb_ftdump(int argc, const char **argv)
{
int skip_entries = 0;
long cpu_file;
- char *cp;
+ int err;
int cnt;
int cpu;
if (argc > 2)
return KDB_ARGCOUNT;
- if (argc) {
- skip_entries = simple_strtol(argv[1], &cp, 0);
- if (*cp)
- skip_entries = 0;
- }
+ if (argc && kstrtoint(argv[1], 0, &skip_entries))
+ return KDB_BADINT;
if (argc == 2) {
- cpu_file = simple_strtol(argv[2], &cp, 0);
- if (*cp || cpu_file >= NR_CPUS || cpu_file < 0 ||
+ err = kstrtol(argv[2], 0, &cpu_file);
+ if (err || cpu_file >= NR_CPUS || cpu_file < 0 ||
!cpu_online(cpu_file))
return KDB_BADINT;
} else {
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 64e77b513697..ba5858866b2f 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -294,7 +294,6 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
struct trace_array_cpu *data,
struct mmiotrace_rw *rw)
{
- struct trace_event_call *call = &event_mmiotrace_rw;
struct trace_buffer *buffer = tr->array_buffer.buffer;
struct ring_buffer_event *event;
struct trace_mmiotrace_rw *entry;
@@ -310,8 +309,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
entry = ring_buffer_event_data(event);
entry->rw = *rw;
- if (!call_filter_check_discard(call, entry, buffer, event))
- trace_buffer_unlock_commit(tr, buffer, event, trace_ctx);
+ trace_buffer_unlock_commit(tr, buffer, event, trace_ctx);
}
void mmio_trace_rw(struct mmiotrace_rw *rw)
@@ -325,7 +323,6 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
struct trace_array_cpu *data,
struct mmiotrace_map *map)
{
- struct trace_event_call *call = &event_mmiotrace_map;
struct trace_buffer *buffer = tr->array_buffer.buffer;
struct ring_buffer_event *event;
struct trace_mmiotrace_map *entry;
@@ -341,8 +338,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
entry = ring_buffer_event_data(event);
entry->map = *map;
- if (!call_filter_check_discard(call, entry, buffer, event))
- trace_buffer_unlock_commit(tr, buffer, event, trace_ctx);
+ trace_buffer_unlock_commit(tr, buffer, event, trace_ctx);
}
void mmio_trace_mapping(struct mmiotrace_map *map)
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index a50ed23bee77..b9f96c77527d 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -499,7 +499,6 @@ static void print_osnoise_headers(struct seq_file *s)
static void
__trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer)
{
- struct trace_event_call *call = &event_osnoise;
struct ring_buffer_event *event;
struct osnoise_entry *entry;
@@ -517,8 +516,7 @@ __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffe
entry->softirq_count = sample->softirq_count;
entry->thread_count = sample->thread_count;
- if (!call_filter_check_discard(call, entry, buffer, event))
- trace_buffer_unlock_commit_nostack(buffer, event);
+ trace_buffer_unlock_commit_nostack(buffer, event);
}
/*
@@ -578,7 +576,6 @@ static void print_timerlat_headers(struct seq_file *s)
static void
__trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer)
{
- struct trace_event_call *call = &event_osnoise;
struct ring_buffer_event *event;
struct timerlat_entry *entry;
@@ -591,8 +588,7 @@ __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buf
entry->context = sample->context;
entry->timer_latency = sample->timer_latency;
- if (!call_filter_check_discard(call, entry, buffer, event))
- trace_buffer_unlock_commit_nostack(buffer, event);
+ trace_buffer_unlock_commit_nostack(buffer, event);
}
/*
@@ -654,7 +650,6 @@ static void timerlat_save_stack(int skip)
static void
__timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size)
{
- struct trace_event_call *call = &event_osnoise;
struct ring_buffer_event *event;
struct stack_entry *entry;
@@ -668,8 +663,7 @@ __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, u
memcpy(&entry->caller, fstack->calls, size);
entry->size = fstack->nr_entries;
- if (!call_filter_check_discard(call, entry, buffer, event))
- trace_buffer_unlock_commit_nostack(buffer, event);
+ trace_buffer_unlock_commit_nostack(buffer, event);
}
/*
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 868f2f912f28..e08aee34ef63 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -460,7 +460,6 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
(entry->flags & TRACE_FLAG_IRQS_OFF && bh_off) ? 'D' :
(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
bh_off ? 'b' :
- (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' :
'.';
switch (entry->flags & (TRACE_FLAG_NEED_RESCHED |
@@ -1246,6 +1245,10 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
break;
trace_seq_puts(s, " => ");
+ if ((*p) == FTRACE_TRAMPOLINE_MARKER) {
+ trace_seq_puts(s, "[FTRACE TRAMPOLINE]\n");
+ continue;
+ }
seq_print_ip_sym(s, (*p) + delta, flags);
trace_seq_putc(s, '\n');
}
diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c
index e37446f7916e..5c03633316a6 100644
--- a/kernel/trace/trace_preemptirq.c
+++ b/kernel/trace/trace_preemptirq.c
@@ -15,20 +15,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/preemptirq.h>
-/*
- * Use regular trace points on architectures that implement noinstr
- * tooling: these calls will only happen with RCU enabled, which can
- * use a regular tracepoint.
- *
- * On older architectures, use the rcuidle tracing methods (which
- * aren't NMI-safe - so exclude NMI contexts):
- */
-#ifdef CONFIG_ARCH_WANTS_NO_INSTR
-#define trace(point) trace_##point
-#else
-#define trace(point) if (!in_nmi()) trace_##point##_rcuidle
-#endif
-
#ifdef CONFIG_TRACE_IRQFLAGS
/* Per-cpu variable to prevent redundant calls when IRQs already off */
static DEFINE_PER_CPU(int, tracing_irq_cpu);
@@ -42,7 +28,7 @@ static DEFINE_PER_CPU(int, tracing_irq_cpu);
void trace_hardirqs_on_prepare(void)
{
if (this_cpu_read(tracing_irq_cpu)) {
- trace(irq_enable)(CALLER_ADDR0, CALLER_ADDR1);
+ trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1);
tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
this_cpu_write(tracing_irq_cpu, 0);
}
@@ -53,7 +39,7 @@ NOKPROBE_SYMBOL(trace_hardirqs_on_prepare);
void trace_hardirqs_on(void)
{
if (this_cpu_read(tracing_irq_cpu)) {
- trace(irq_enable)(CALLER_ADDR0, CALLER_ADDR1);
+ trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1);
tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
this_cpu_write(tracing_irq_cpu, 0);
}
@@ -75,7 +61,7 @@ void trace_hardirqs_off_finish(void)
if (!this_cpu_read(tracing_irq_cpu)) {
this_cpu_write(tracing_irq_cpu, 1);
tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1);
- trace(irq_disable)(CALLER_ADDR0, CALLER_ADDR1);
+ trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1);
}
}
@@ -89,7 +75,7 @@ void trace_hardirqs_off(void)
if (!this_cpu_read(tracing_irq_cpu)) {
this_cpu_write(tracing_irq_cpu, 1);
tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1);
- trace(irq_disable)(CALLER_ADDR0, CALLER_ADDR1);
+ trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1);
}
}
EXPORT_SYMBOL(trace_hardirqs_off);
@@ -100,13 +86,13 @@ NOKPROBE_SYMBOL(trace_hardirqs_off);
void trace_preempt_on(unsigned long a0, unsigned long a1)
{
- trace(preempt_enable)(a0, a1);
+ trace_preempt_enable(a0, a1);
tracer_preempt_on(a0, a1);
}
void trace_preempt_off(unsigned long a0, unsigned long a1)
{
- trace(preempt_disable)(a0, a1);
+ trace_preempt_disable(a0, a1);
tracer_preempt_off(a0, a1);
}
#endif
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 8a407adb0e1c..573b5d8e8a28 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -187,7 +187,7 @@ static inline char *get_saved_cmdlines(int idx)
static inline void set_cmdline(int idx, const char *cmdline)
{
- strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
+ strscpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
}
static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index ae2ace5e515a..d6c7f18daa15 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -378,7 +378,6 @@ tracing_sched_switch_trace(struct trace_array *tr,
struct task_struct *next,
unsigned int trace_ctx)
{
- struct trace_event_call *call = &event_context_switch;
struct trace_buffer *buffer = tr->array_buffer.buffer;
struct ring_buffer_event *event;
struct ctx_switch_entry *entry;
@@ -396,8 +395,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
entry->next_state = task_state_index(next);
entry->next_cpu = task_cpu(next);
- if (!call_filter_check_discard(call, entry, buffer, event))
- trace_buffer_unlock_commit(tr, buffer, event, trace_ctx);
+ trace_buffer_unlock_commit(tr, buffer, event, trace_ctx);
}
static void
@@ -406,7 +404,6 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
struct task_struct *curr,
unsigned int trace_ctx)
{
- struct trace_event_call *call = &event_wakeup;
struct ring_buffer_event *event;
struct ctx_switch_entry *entry;
struct trace_buffer *buffer = tr->array_buffer.buffer;
@@ -424,8 +421,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
entry->next_state = task_state_index(wakee);
entry->next_cpu = task_cpu(wakee);
- if (!call_filter_check_discard(call, entry, buffer, event))
- trace_buffer_unlock_commit(tr, buffer, event, trace_ctx);
+ trace_buffer_unlock_commit(tr, buffer, event, trace_ctx);
}
static void notrace
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 1469dd8075fa..38b5754790c9 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
case TRACE_PRINT:
case TRACE_BRANCH:
case TRACE_GRAPH_ENT:
+ case TRACE_GRAPH_RETADDR_ENT:
case TRACE_GRAPH_RET:
return 1;
}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 785733245ead..46aab0ab9350 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -299,6 +299,13 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
int syscall_nr;
int size;
+ /*
+ * Syscall probe called with preemption enabled, but the ring
+ * buffer and per-cpu data require preemption to be disabled.
+ */
+ might_fault();
+ guard(preempt_notrace)();
+
syscall_nr = trace_get_syscall_nr(current, regs);
if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
return;
@@ -338,6 +345,13 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
struct trace_event_buffer fbuffer;
int syscall_nr;
+ /*
+ * Syscall probe called with preemption enabled, but the ring
+ * buffer and per-cpu data require preemption to be disabled.
+ */
+ might_fault();
+ guard(preempt_notrace)();
+
syscall_nr = trace_get_syscall_nr(current, regs);
if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
return;
@@ -584,6 +598,13 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
int rctx;
int size;
+ /*
+ * Syscall probe called with preemption enabled, but the ring
+ * buffer and per-cpu data require preemption to be disabled.
+ */
+ might_fault();
+ guard(preempt_notrace)();
+
syscall_nr = trace_get_syscall_nr(current, regs);
if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
return;
@@ -686,6 +707,13 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
int rctx;
int size;
+ /*
+ * Syscall probe called with preemption enabled, but the ring
+ * buffer and per-cpu data require preemption to be disabled.
+ */
+ might_fault();
+ guard(preempt_notrace)();
+
syscall_nr = trace_get_syscall_nr(current, regs);
if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
return;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index b30fc8fcd095..fed382b7881b 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -89,9 +89,11 @@ static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev)
static int register_uprobe_event(struct trace_uprobe *tu);
static int unregister_uprobe_event(struct trace_uprobe *tu);
-static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
+static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs,
+ __u64 *data);
static int uretprobe_dispatcher(struct uprobe_consumer *con,
- unsigned long func, struct pt_regs *regs);
+ unsigned long func, struct pt_regs *regs,
+ __u64 *data);
#ifdef CONFIG_STACK_GROWSUP
static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
@@ -1522,7 +1524,8 @@ trace_uprobe_register(struct trace_event_call *event, enum trace_reg type,
}
}
-static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
+static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs,
+ __u64 *data)
{
struct trace_uprobe *tu;
struct uprobe_dispatch_data udd;
@@ -1553,7 +1556,8 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
}
static int uretprobe_dispatcher(struct uprobe_consumer *con,
- unsigned long func, struct pt_regs *regs)
+ unsigned long func, struct pt_regs *regs,
+ __u64 *data)
{
struct trace_uprobe *tu;
struct uprobe_dispatch_data udd;