From 40f20e5074b035c7111e135aa939d1d1a96a2480 Mon Sep 17 00:00:00 2001 From: He Kuang Date: Mon, 16 May 2016 04:51:19 +0000 Subject: perf script: Show call graphs when 1st event doesn't have it but some other has There's a display inconsistency when there are multiple tracepoint events, some of which have the 'call-graph' config option set but the first one hasn't, i.e. the whole logic for call graph processing is enabled only if the first tracepoint event has call-graph set. For instance, if we record signal_deliver with call-graph and signal_generate without: $ perf record -g -a -e signal:signal_deliver -e signal:signal_generate/call-graph=no/ [ perf record: Captured and wrote 0.017 MB perf.data (2 samples) ] $ perf script kworker/u2:1 13 [000] 6563.875949: signal:signal_generate: sig=2 errno=0 code=128 comm=perf pid=1313 grp=1 res=0 ff61cc __send_signal+0x3ec ([kernel.kallsyms]) perf 1313 [000] 6563.877584: signal:signal_deliver: sig=2 errno=0 code=128 sa_handler=43115e sa_flags=14000000 7ffff314 get_signal+0x80007f0023a4 ([kernel.kallsyms]) 7fffe358 do_signal+0x80007f002028 ([kernel.kallsyms]) 7fffa5e8 exit_to_usermode_loop+0x80007f002053 ([kernel.kallsyms]) ... Then we exchange the order of these two events in commandline, and keep signal_generate without call-graph. $ perf record -g -a -e signal:signal_generate/call-graph=no/ -e signal:signal_deliver [ perf record: Captured and wrote 0.017 MB perf.data (2 samples) ] $ perf script kworker/u2:2 1314 [000] 6933.353060: signal:signal_generate: sig=2 errno=0 code=128 comm=perf pid=1321 grp=1 res=0 perf 1321 [000] 6933.353872: signal:signal_deliver: sig=2 errno=0 code=128 sa_handler=43115e sa_flags=14000000 This time, the callchain of the event signal_deliver disappeared. The problem is caused by that perf only checks for the first evsel in evlist and decides if callchain should be printed. This patch traverses all evsels in evlist to see if any of them have callchains, and shows the right result: $ perf script kworker/u2:2 1314 [000] 6933.353060: signal:signal_generate: sig=2 errno=0 code=128 comm=perf pid=1321 grp=1 res=0 ff61cc __send_signal+0x3ec ([kernel.kallsyms]) perf 1321 [000] 6933.353872: signal:signal_deliver: sig=2 errno=0 code=128 sa_handler=43115e sa_flags=14000000 7ffff314 get_signal+0x80007f0023a4 ([kernel.kallsyms]) 7fffe358 do_signal+0x80007f002028 ([kernel.kallsyms]) 7fffa5e8 exit_to_usermode_loop+0x80007f002053 ([kernel.kallsyms]) ... Signed-off-by: He Kuang Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/1463374279-97209-1-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index e3ce2f34d3ad..46011235af5d 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -339,7 +339,7 @@ static void set_print_ip_opts(struct perf_event_attr *attr) */ static int perf_session__check_output_opt(struct perf_session *session) { - int j; + unsigned int j; struct perf_evsel *evsel; for (j = 0; j < PERF_TYPE_MAX; ++j) { @@ -388,17 +388,20 @@ static int perf_session__check_output_opt(struct perf_session *session) struct perf_event_attr *attr; j = PERF_TYPE_TRACEPOINT; - evsel = perf_session__find_first_evtype(session, j); - if (evsel == NULL) - goto out; - attr = &evsel->attr; + evlist__for_each(session->evlist, evsel) { + if (evsel->attr.type != j) + continue; + + attr = &evsel->attr; - if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) { - output[j].fields |= PERF_OUTPUT_IP; - output[j].fields |= PERF_OUTPUT_SYM; - output[j].fields |= PERF_OUTPUT_DSO; - set_print_ip_opts(attr); + if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) { + output[j].fields |= PERF_OUTPUT_IP; + output[j].fields |= PERF_OUTPUT_SYM; + output[j].fields |= PERF_OUTPUT_DSO; + set_print_ip_opts(attr); + goto out; + } } } -- cgit From e5cadb93d0839d268a7c4199e0fdef0f94722117 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 23 Jun 2016 11:26:15 -0300 Subject: perf evlist: Rename for_each() macros to for_each_entry() To match the semantics for list.h in the kernel, that are used to implement those macros. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Taeung Song Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-qbcjlgj0ffxquxscahbpddi3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 46011235af5d..0e18e06e7fd5 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -369,7 +369,7 @@ static int perf_session__check_output_opt(struct perf_session *session) if (!no_callchain) { bool use_callchain = false; - evlist__for_each(session->evlist, evsel) { + evlist__for_each_entry(session->evlist, evsel) { if (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { use_callchain = true; break; @@ -389,7 +389,7 @@ static int perf_session__check_output_opt(struct perf_session *session) j = PERF_TYPE_TRACEPOINT; - evlist__for_each(session->evlist, evsel) { + evlist__for_each_entry(session->evlist, evsel) { if (evsel->attr.type != j) continue; @@ -720,7 +720,7 @@ static int perf_evlist__max_name_len(struct perf_evlist *evlist) struct perf_evsel *evsel; int max = 0; - evlist__for_each(evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { int len = strlen(perf_evsel__name(evsel)); max = MAX(len, max); @@ -945,7 +945,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, if (evsel->attr.type >= PERF_TYPE_MAX) return 0; - evlist__for_each(evlist, pos) { + evlist__for_each_entry(evlist, pos) { if (pos->attr.type == evsel->attr.type && pos != evsel) return 0; } @@ -1671,7 +1671,7 @@ static int check_ev_match(char *dir_name, char *scriptname, snprintf(evname, len + 1, "%s", p); match = 0; - evlist__for_each(session->evlist, pos) { + evlist__for_each_entry(session->evlist, pos) { if (!strcmp(perf_evsel__name(pos), evname)) { match = 1; break; @@ -1873,7 +1873,7 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused, struct stat_round_event *round = &event->stat_round; struct perf_evsel *counter; - evlist__for_each(session->evlist, counter) { + evlist__for_each_entry(session->evlist, counter) { perf_stat_process_counter(&stat_config, counter); process_stat(counter, round->time); } -- cgit From 055cd33d93ac310ce68e8b1ebcf88f829426adf5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 23 Jun 2016 16:40:56 +0300 Subject: perf script: Print sample flags more nicely The flags field is synthesized and may have a value when Instruction Trace decoding. The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, system, asynchronous, interrupt, transaction abort, trace begin, trace end, and in transaction, respectively. Change the display so that known combinations of flags are printed more nicely e.g.: "call" for "bc", "return" for "br", "jcc" for "bo", "jmp" for "b", "int" for "bci", "iret" for "bri", "syscall" for "bcs", "sysret" for "brs", "async" for "by", "hw int" for "bcyi", "tx abrt" for "bA", "tr strt" for "bB", "tr end" for "bE". However the "x" flag will be displayed separately in those cases e.g. "jcc (x)" for a condition branch within a transaction. Example: perf record -e intel_pt//u ls perf script --ns -F comm,cpu,pid,tid,time,ip,addr,sym,dso,symoff,flags ... ls 3689/3689 [001] 2062.020965237: jcc 7f06a958847a _dl_sysdep_start+0xfa (/lib/x86_64-linux-gnu/ld-2.19.so) => 7f06a9588450 _dl_sysdep_start+0xd0 (/lib/x86_64-linux-gnu/ld-2.19.so) ls 3689/3689 [001] 2062.020965237: jmp 7f06a9588461 _dl_sysdep_start+0xe1 (/lib/x86_64-linux-gnu/ld-2.19.so) => 7f06a95885a0 _dl_sysdep_start+0x220 (/lib/x86_64-linux-gnu/ld-2.19.so) ls 3689/3689 [001] 2062.020965237: jmp 7f06a95885a4 _dl_sysdep_start+0x224 (/lib/x86_64-linux-gnu/ld-2.19.so) => 7f06a9588470 _dl_sysdep_start+0xf0 (/lib/x86_64-linux-gnu/ld-2.19.so) ls 3689/3689 [001] 2062.020965904: call 7f06a95884c3 _dl_sysdep_start+0x143 (/lib/x86_64-linux-gnu/ld-2.19.so) => 7f06a9589140 brk+0x0 (/lib/x86_64-linux-gnu/ld-2.19.so) ls 3689/3689 [001] 2062.020965904: syscall 7f06a958914a brk+0xa (/lib/x86_64-linux-gnu/ld-2.19.so) => 0 [unknown] ([unknown]) ls 3689/3689 [001] 2062.020966237: tr strt 0 [unknown] ([unknown]) => 7f06a958914c brk+0xc (/lib/x86_64-linux-gnu/ld-2.19.so) ls 3689/3689 [001] 2062.020966237: return 7f06a9589165 brk+0x25 (/lib/x86_64-linux-gnu/ld-2.19.so) => 7f06a95884c8 _dl_sysdep_start+0x148 (/lib/x86_64-linux-gnu/ld-2.19.so) ls 3689/3689 [001] 2062.020966237: jcc 7f06a95884d7 _dl_sysdep_start+0x157 (/lib/x86_64-linux-gnu/ld-2.19.so) => 7f06a95885f0 _dl_sysdep_start+0x270 (/lib/x86_64-linux-gnu/ld-2.19.so) ls 3689/3689 [001] 2062.020966237: call 7f06a95885f0 _dl_sysdep_start+0x270 (/lib/x86_64-linux-gnu/ld-2.19.so) => 7f06a958ac50 strlen+0x0 (/lib/x86_64-linux-gnu/ld-2.19.so) ls 3689/3689 [001] 2062.020966237: jcc 7f06a958ac6e strlen+0x1e (/lib/x86_64-linux-gnu/ld-2.19.so) => 7f06a958ac60 strlen+0x10 (/lib/x86_64-linux-gnu/ld-2.19.so) ... Signed-off-by: Adrian Hunter Acked-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1466689258-28493-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 0e18e06e7fd5..56b2fb8135ce 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -606,13 +606,42 @@ static void print_sample_bts(struct perf_sample *sample, printf("\n"); } +static struct { + u32 flags; + const char *name; +} sample_flags[] = { + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL, "call"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN, "return"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL, "jcc"}, + {PERF_IP_FLAG_BRANCH, "jmp"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT, "int"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT, "iret"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET, "syscall"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET, "sysret"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_ASYNC, "async"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_INTERRUPT, "hw int"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "tx abrt"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "tr strt"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"}, + {0, NULL} +}; + static void print_sample_flags(u32 flags) { const char *chars = PERF_IP_FLAG_CHARS; const int n = strlen(PERF_IP_FLAG_CHARS); + bool in_tx = flags & PERF_IP_FLAG_IN_TX; + const char *name = NULL; char str[33]; int i, pos = 0; + for (i = 0; sample_flags[i].name ; i++) { + if (sample_flags[i].flags == (flags & ~PERF_IP_FLAG_IN_TX)) { + name = sample_flags[i].name; + break; + } + } + for (i = 0; i < n; i++, flags >>= 1) { if (flags & 1) str[pos++] = chars[i]; @@ -622,7 +651,11 @@ static void print_sample_flags(u32 flags) str[pos++] = '?'; } str[pos] = 0; - printf(" %-4s ", str); + + if (name) + printf(" %-7s%4s ", name, in_tx ? "(x)" : ""); + else + printf(" %-11s ", str); } struct printer_data { -- cgit From e216708d982a1c262f411fee2fcac2bd9ec93a32 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 23 Jun 2016 16:40:58 +0300 Subject: perf script: Add callindent option Based on patches from Andi Kleen. When printing PT instruction traces with perf script it is rather useful to see some indentation for the call tree. This patch adds a new callindent field to perf script that prints spaces for the function call stack depth. We already have code to track the function call stack for PT, that we can reuse with minor modifications. The resulting output is not quite as nice as ftrace yet, but a lot better than what was there before. Note there are some corner cases when the thread stack gets code confused and prints incorrect indentation. Even with that it is fairly useful. When displaying kernel code traces it is recommended to run as root, as otherwise perf doesn't understand the kernel addresses properly, and may not reset the call stack correctly on kernel boundaries. Example output: sudo perf-with-kcore record eg2 -a -e intel_pt// -- sleep 1 sudo perf-with-kcore script eg2 --ns -F callindent,time,comm,pid,sym,ip,addr,flags,cpu --itrace=cre | less ... swapper 0 [000] 5830.389116586: call irq_exit ffffffff8104d620 smp_call_function_single_interrupt+0x30 => ffffffff8107e720 irq_exit swapper 0 [000] 5830.389116586: call idle_cpu ffffffff8107e769 irq_exit+0x49 => ffffffff810a3970 idle_cpu swapper 0 [000] 5830.389116586: return idle_cpu ffffffff810a39b7 idle_cpu+0x47 => ffffffff8107e76e irq_exit swapper 0 [000] 5830.389116586: call tick_nohz_irq_exit ffffffff8107e7bd irq_exit+0x9d => ffffffff810f2fc0 tick_nohz_irq_exit swapper 0 [000] 5830.389116919: call __tick_nohz_idle_enter ffffffff810f2fe0 tick_nohz_irq_exit+0x20 => ffffffff810f28d0 __tick_nohz_idle_enter swapper 0 [000] 5830.389116919: call ktime_get ffffffff810f28f1 __tick_nohz_idle_enter+0x21 => ffffffff810e9ec0 ktime_get swapper 0 [000] 5830.389116919: call read_tsc ffffffff810e9ef6 ktime_get+0x36 => ffffffff81035070 read_tsc swapper 0 [000] 5830.389116919: return read_tsc ffffffff81035084 read_tsc+0x14 => ffffffff810e9efc ktime_get swapper 0 [000] 5830.389116919: return ktime_get ffffffff810e9f46 ktime_get+0x86 => ffffffff810f28f6 __tick_nohz_idle_enter swapper 0 [000] 5830.389116919: call sched_clock_idle_sleep_event ffffffff810f290b __tick_nohz_idle_enter+0x3b => ffffffff810a7380 sched_clock_idle_sleep_event swapper 0 [000] 5830.389116919: call sched_clock_cpu ffffffff810a738b sched_clock_idle_sleep_event+0xb => ffffffff810a72e0 sched_clock_cpu swapper 0 [000] 5830.389116919: call sched_clock ffffffff810a734d sched_clock_cpu+0x6d => ffffffff81035750 sched_clock swapper 0 [000] 5830.389116919: call native_sched_clock ffffffff81035754 sched_clock+0x4 => ffffffff81035640 native_sched_clock swapper 0 [000] 5830.389116919: return native_sched_clock ffffffff8103568c native_sched_clock+0x4c => ffffffff81035759 sched_clock swapper 0 [000] 5830.389116919: return sched_clock ffffffff8103575c sched_clock+0xc => ffffffff810a7352 sched_clock_cpu swapper 0 [000] 5830.389116919: return sched_clock_cpu ffffffff810a7356 sched_clock_cpu+0x76 => ffffffff810a7390 sched_clock_idle_sleep_event swapper 0 [000] 5830.389116919: return sched_clock_idle_sleep_event ffffffff810a7391 sched_clock_idle_sleep_event+0x11 => ffffffff810f2910 __tick_nohz_idle_enter ... Signed-off-by: Adrian Hunter Acked-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1466689258-28493-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 68 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 56b2fb8135ce..971ff91b16cb 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -21,6 +21,7 @@ #include "util/cpumap.h" #include "util/thread_map.h" #include "util/stat.h" +#include "util/thread-stack.h" #include #include #include "asm/bug.h" @@ -63,6 +64,7 @@ enum perf_output_field { PERF_OUTPUT_DATA_SRC = 1U << 17, PERF_OUTPUT_WEIGHT = 1U << 18, PERF_OUTPUT_BPF_OUTPUT = 1U << 19, + PERF_OUTPUT_CALLINDENT = 1U << 20, }; struct output_option { @@ -89,6 +91,7 @@ struct output_option { {.str = "data_src", .field = PERF_OUTPUT_DATA_SRC}, {.str = "weight", .field = PERF_OUTPUT_WEIGHT}, {.str = "bpf-output", .field = PERF_OUTPUT_BPF_OUTPUT}, + {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT}, }; /* default set to maintain compatibility with current format */ @@ -562,6 +565,62 @@ static void print_sample_addr(struct perf_sample *sample, } } +static void print_sample_callindent(struct perf_sample *sample, + struct perf_evsel *evsel, + struct thread *thread, + struct addr_location *al) +{ + struct perf_event_attr *attr = &evsel->attr; + size_t depth = thread_stack__depth(thread); + struct addr_location addr_al; + const char *name = NULL; + static int spacing; + int len = 0; + u64 ip = 0; + + /* + * The 'return' has already been popped off the stack so the depth has + * to be adjusted to match the 'call'. + */ + if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN) + depth += 1; + + if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) { + if (sample_addr_correlates_sym(attr)) { + thread__resolve(thread, &addr_al, sample); + if (addr_al.sym) + name = addr_al.sym->name; + else + ip = sample->addr; + } else { + ip = sample->addr; + } + } else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) { + if (al->sym) + name = al->sym->name; + else + ip = sample->ip; + } + + if (name) + len = printf("%*s%s", (int)depth * 4, "", name); + else if (ip) + len = printf("%*s%16" PRIx64, (int)depth * 4, "", ip); + + if (len < 0) + return; + + /* + * Try to keep the output length from changing frequently so that the + * output lines up more nicely. + */ + if (len > spacing || (len && len < spacing - 52)) + spacing = round_up(len + 4, 32); + + if (len < spacing) + printf("%*s", spacing - len, ""); +} + static void print_sample_bts(struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, @@ -570,6 +629,9 @@ static void print_sample_bts(struct perf_sample *sample, struct perf_event_attr *attr = &evsel->attr; bool print_srcline_last = false; + if (PRINT_FIELD(CALLINDENT)) + print_sample_callindent(sample, evsel, thread, al); + /* print branch_from information */ if (PRINT_FIELD(IP)) { unsigned int print_opts = output[attr->type].print_ip_opts; @@ -2053,7 +2115,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "comma separated output fields prepend with 'type:'. " "Valid types: hw,sw,trace,raw. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," - "addr,symoff,period,iregs,brstack,brstacksym,flags", parse_output_fields), + "addr,symoff,period,iregs,brstack,brstacksym,flags," + "callindent", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", @@ -2292,6 +2355,9 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) script.session = session; script__setup_sample_type(&script); + if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT) + itrace_synth_opts.thread_stack = true; + session->itrace_synth_opts = &itrace_synth_opts; if (cpu_list) { -- cgit