aboutsummaryrefslogtreecommitdiff
path: root/tools/perf/builtin-stat.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2017-12-28 16:16:24 +0100
committerIngo Molnar <mingo@kernel.org>2017-12-28 16:16:24 +0100
commita0a8f2ada3f50f0190ebcbb60e77e697444c470e (patch)
tree4967ea5026b1b9c228286540a059a85c667c01a1 /tools/perf/builtin-stat.c
parentfaaf95677f33dac910b6cbe917cabea43c8c1616 (diff)
parent5d4fd9c8b83b36d34521b3af361a5726899045bf (diff)
downloadlinux-a0a8f2ada3f50f0190ebcbb60e77e697444c470e.tar.gz
linux-a0a8f2ada3f50f0190ebcbb60e77e697444c470e.tar.bz2
linux-a0a8f2ada3f50f0190ebcbb60e77e697444c470e.zip
Merge tag 'perf-core-for-mingo-4.16-20171227' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: - Allow system wide 'perf stat --per-thread', sorting the result (Jin Yao) E.g.: [root@jouet ~]# perf stat --per-thread --metrics IPC ^C Performance counter stats for 'system wide': make-22229 23,012,094,032 inst_retired.any # 0.8 IPC cc1-22419 692,027,497 inst_retired.any # 0.8 IPC gcc-22418 328,231,855 inst_retired.any # 0.9 IPC cc1-22509 220,853,647 inst_retired.any # 0.8 IPC gcc-22486 199,874,810 inst_retired.any # 1.0 IPC as-22466 177,896,365 inst_retired.any # 0.9 IPC cc1-22465 150,732,374 inst_retired.any # 0.8 IPC gcc-22508 112,555,593 inst_retired.any # 0.9 IPC cc1-22487 108,964,079 inst_retired.any # 0.7 IPC qemu-system-x86-2697 21,330,550 inst_retired.any # 0.3 IPC systemd-journal-551 20,642,951 inst_retired.any # 0.4 IPC docker-containe-17651 9,552,892 inst_retired.any # 0.5 IPC dockerd-current-9809 7,528,586 inst_retired.any # 0.5 IPC make-22153 12,504,194,380 inst_retired.any # 0.8 IPC python2-22429 12,081,290,954 inst_retired.any # 0.8 IPC <SNIP> python2-22429 15,026,328,103 cpu_clk_unhalted.thread cc1-22419 826,660,193 cpu_clk_unhalted.thread gcc-22418 365,321,295 cpu_clk_unhalted.thread cc1-22509 279,169,362 cpu_clk_unhalted.thread gcc-22486 210,156,950 cpu_clk_unhalted.thread <SNIP> 5.638075538 seconds time elapsed [root@jouet ~]# - Improve shell auto-completion of perf events (Jin Yao) - Fix symbol fixup issues in arm64 due to ELF type (Kim Phillips) - Ignore threads when they vanish after procfs based enumeration and before we try to use them with sys_perf_event_open(), i.e. just remove them from the thread_map and continue with the rest. This makes, among other cases, the previous new feature (perf stat --per-thread for system wide, albeit that not seeming to be the motivation for this patch) more robust. (Mengting Zhang) - Generate s390 syscall table from asm/unistd.h, doing like x86, removing the dependency on audit-libs to do this id->string translation, speeding up the support for newly introducted syscalls (Hendrik Brueckner) - Fix 'perf test' on filesystems where readdir() returns d_type == DT_UNKNOWN, such as XFS (Jiri Olsa) - Fix PERF_SAMPLE_RAW_DATA endianity handling for cross-arch tracepoint processing (Jiri Olsa) - Add __return suffix for return events in 'perf probe', streamlining entry/exit tracing (Masami Hiramatsu) - Improve support for versioned symbols in 'perf probe" (Masami Hiramatsu) - Clarify error message about invalid 'perf probe' event names (Masami Hiramatsu) - Fix check open filename arg using 'perf trace' in a 'perf test' entry for systems using glibc >= 2.26, such as some ARM and s390 distros (Michael Petlan) - Make method for obtaining the (normalized) architecture id for a perf.data file or for the running system used by the annotation routines generally available, next user will be for generating per arch errno string tables to allow for pretty printing errno codes recorded in a perf.data file in architecture A to be properly decoded on hardware archictecture B. (Arnaldo Carvalho de Melo) - Remove duplicate includes, found using scripts/checkincludes.pl (Pravin Shedge) - s390 needs -fPIC, enable it, also revert a patch that supposedly did that but instead enabled -fPIC for x86 (Hendrik Brueckner, Arnaldo Carvalho de Melo) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r--tools/perf/builtin-stat.c168
1 files changed, 143 insertions, 25 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a027b4712e48..98bf9d32f222 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -63,7 +63,6 @@
#include "util/group.h"
#include "util/session.h"
#include "util/tool.h"
-#include "util/group.h"
#include "util/string2.h"
#include "util/metricgroup.h"
#include "asm/bug.h"
@@ -214,8 +213,13 @@ static inline void diff_timespec(struct timespec *r, struct timespec *a,
static void perf_stat__reset_stats(void)
{
+ int i;
+
perf_evlist__reset_stats(evsel_list);
perf_stat__reset_shadow_stats();
+
+ for (i = 0; i < stat_config.stats_num; i++)
+ perf_stat__reset_shadow_per_stat(&stat_config.stats[i]);
}
static int create_perf_stat_counter(struct perf_evsel *evsel)
@@ -272,7 +276,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
attr->enable_on_exec = 1;
}
- if (target__has_cpu(&target))
+ if (target__has_cpu(&target) && !target__has_per_thread(&target))
return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
return perf_evsel__open_per_thread(evsel, evsel_list->threads);
@@ -335,7 +339,7 @@ static int read_counter(struct perf_evsel *counter)
int nthreads = thread_map__nr(evsel_list->threads);
int ncpus, cpu, thread;
- if (target__has_cpu(&target))
+ if (target__has_cpu(&target) && !target__has_per_thread(&target))
ncpus = perf_evsel__nr_cpus(counter);
else
ncpus = 1;
@@ -1097,7 +1101,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
}
static void printout(int id, int nr, struct perf_evsel *counter, double uval,
- char *prefix, u64 run, u64 ena, double noise)
+ char *prefix, u64 run, u64 ena, double noise,
+ struct runtime_stat *st)
{
struct perf_stat_output_ctx out;
struct outstate os = {
@@ -1190,7 +1195,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
perf_stat__print_shadow_stats(counter, uval,
first_shadow_cpu(counter, id),
- &out, &metric_events);
+ &out, &metric_events, st);
if (!csv_output && !metric_only) {
print_noise(counter, noise);
print_running(run, ena);
@@ -1214,7 +1219,8 @@ static void aggr_update_shadow(void)
val += perf_counts(counter->counts, cpu, 0)->val;
}
perf_stat__update_shadow_stats(counter, val,
- first_shadow_cpu(counter, id));
+ first_shadow_cpu(counter, id),
+ &rt_stat);
}
}
}
@@ -1334,7 +1340,8 @@ static void print_aggr(char *prefix)
fprintf(output, "%s", prefix);
uval = val * counter->scale;
- printout(id, nr, counter, uval, prefix, run, ena, 1.0);
+ printout(id, nr, counter, uval, prefix, run, ena, 1.0,
+ &rt_stat);
if (!metric_only)
fputc('\n', output);
}
@@ -1343,13 +1350,24 @@ static void print_aggr(char *prefix)
}
}
-static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
+static int cmp_val(const void *a, const void *b)
{
- FILE *output = stat_config.output;
- int nthreads = thread_map__nr(counter->threads);
- int ncpus = cpu_map__nr(counter->cpus);
- int cpu, thread;
+ return ((struct perf_aggr_thread_value *)b)->val -
+ ((struct perf_aggr_thread_value *)a)->val;
+}
+
+static struct perf_aggr_thread_value *sort_aggr_thread(
+ struct perf_evsel *counter,
+ int nthreads, int ncpus,
+ int *ret)
+{
+ int cpu, thread, i = 0;
double uval;
+ struct perf_aggr_thread_value *buf;
+
+ buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value));
+ if (!buf)
+ return NULL;
for (thread = 0; thread < nthreads; thread++) {
u64 ena = 0, run = 0, val = 0;
@@ -1360,13 +1378,63 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
run += perf_counts(counter->counts, cpu, thread)->run;
}
+ uval = val * counter->scale;
+
+ /*
+ * Skip value 0 when enabling --per-thread globally,
+ * otherwise too many 0 output.
+ */
+ if (uval == 0.0 && target__has_per_thread(&target))
+ continue;
+
+ buf[i].counter = counter;
+ buf[i].id = thread;
+ buf[i].uval = uval;
+ buf[i].val = val;
+ buf[i].run = run;
+ buf[i].ena = ena;
+ i++;
+ }
+
+ qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val);
+
+ if (ret)
+ *ret = i;
+
+ return buf;
+}
+
+static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
+{
+ FILE *output = stat_config.output;
+ int nthreads = thread_map__nr(counter->threads);
+ int ncpus = cpu_map__nr(counter->cpus);
+ int thread, sorted_threads, id;
+ struct perf_aggr_thread_value *buf;
+
+ buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads);
+ if (!buf) {
+ perror("cannot sort aggr thread");
+ return;
+ }
+
+ for (thread = 0; thread < sorted_threads; thread++) {
if (prefix)
fprintf(output, "%s", prefix);
- uval = val * counter->scale;
- printout(thread, 0, counter, uval, prefix, run, ena, 1.0);
+ id = buf[thread].id;
+ if (stat_config.stats)
+ printout(id, 0, buf[thread].counter, buf[thread].uval,
+ prefix, buf[thread].run, buf[thread].ena, 1.0,
+ &stat_config.stats[id]);
+ else
+ printout(id, 0, buf[thread].counter, buf[thread].uval,
+ prefix, buf[thread].run, buf[thread].ena, 1.0,
+ &rt_stat);
fputc('\n', output);
}
+
+ free(buf);
}
struct caggr_data {
@@ -1401,7 +1469,8 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
fprintf(output, "%s", prefix);
uval = cd.avg * counter->scale;
- printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg);
+ printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled,
+ cd.avg, &rt_stat);
if (!metric_only)
fprintf(output, "\n");
}
@@ -1440,7 +1509,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
fprintf(output, "%s", prefix);
uval = val * counter->scale;
- printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
+ printout(cpu, 0, counter, uval, prefix, run, ena, 1.0,
+ &rt_stat);
fputc('\n', output);
}
@@ -1472,7 +1542,8 @@ static void print_no_aggr_metric(char *prefix)
run = perf_counts(counter->counts, cpu, 0)->run;
uval = val * counter->scale;
- printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
+ printout(cpu, 0, counter, uval, prefix, run, ena, 1.0,
+ &rt_stat);
}
fputc('\n', stat_config.output);
}
@@ -1528,7 +1599,8 @@ static void print_metric_headers(const char *prefix, bool no_indent)
perf_stat__print_shadow_stats(counter, 0,
0,
&out,
- &metric_events);
+ &metric_events,
+ &rt_stat);
}
fputc('\n', stat_config.output);
}
@@ -2487,6 +2559,35 @@ int process_cpu_map_event(struct perf_tool *tool,
return set_maps(st);
}
+static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
+{
+ int i;
+
+ config->stats = calloc(nthreads, sizeof(struct runtime_stat));
+ if (!config->stats)
+ return -1;
+
+ config->stats_num = nthreads;
+
+ for (i = 0; i < nthreads; i++)
+ runtime_stat__init(&config->stats[i]);
+
+ return 0;
+}
+
+static void runtime_stat_delete(struct perf_stat_config *config)
+{
+ int i;
+
+ if (!config->stats)
+ return;
+
+ for (i = 0; i < config->stats_num; i++)
+ runtime_stat__exit(&config->stats[i]);
+
+ free(config->stats);
+}
+
static const char * const stat_report_usage[] = {
"perf stat report [<options>]",
NULL,
@@ -2696,12 +2797,16 @@ int cmd_stat(int argc, const char **argv)
run_count = 1;
}
- if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
- fprintf(stderr, "The --per-thread option is only available "
- "when monitoring via -p -t options.\n");
- parse_options_usage(NULL, stat_options, "p", 1);
- parse_options_usage(NULL, stat_options, "t", 1);
- goto out;
+ if ((stat_config.aggr_mode == AGGR_THREAD) &&
+ !target__has_task(&target)) {
+ if (!target.system_wide || target.cpu_list) {
+ fprintf(stderr, "The --per-thread option is only "
+ "available when monitoring via -p -t -a "
+ "options or only --per-thread.\n");
+ parse_options_usage(NULL, stat_options, "p", 1);
+ parse_options_usage(NULL, stat_options, "t", 1);
+ goto out;
+ }
}
/*
@@ -2725,6 +2830,9 @@ int cmd_stat(int argc, const char **argv)
target__validate(&target);
+ if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
+ target.per_thread = true;
+
if (perf_evlist__create_maps(evsel_list, &target) < 0) {
if (target__has_task(&target)) {
pr_err("Problems finding threads of monitor\n");
@@ -2742,8 +2850,15 @@ int cmd_stat(int argc, const char **argv)
* Initialize thread_map with comm names,
* so we could print it out on output.
*/
- if (stat_config.aggr_mode == AGGR_THREAD)
+ if (stat_config.aggr_mode == AGGR_THREAD) {
thread_map__read_comms(evsel_list->threads);
+ if (target.system_wide) {
+ if (runtime_stat_new(&stat_config,
+ thread_map__nr(evsel_list->threads))) {
+ goto out;
+ }
+ }
+ }
if (interval && interval < 100) {
if (interval < 10) {
@@ -2833,5 +2948,8 @@ out:
sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
perf_evlist__delete(evsel_list);
+
+ runtime_stat_delete(&stat_config);
+
return status;
}