aboutsummaryrefslogtreecommitdiff
path: root/tools/perf/bench/numa.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-15 11:17:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-15 11:17:15 -0700
commit713eee84720e6525bc5b65954c5087604a15f5e8 (patch)
treea95c09841561bf97e005bded0300e7e3d39da5b0 /tools/perf/bench/numa.c
parent50f6c7dbd973092d8e5f3c89f29eb4bea19fdebd (diff)
parent492e4edba6e2fc0620a69266d33f29c4a1f9ac1e (diff)
downloadlinux-713eee84720e6525bc5b65954c5087604a15f5e8.tar.gz
linux-713eee84720e6525bc5b65954c5087604a15f5e8.tar.bz2
linux-713eee84720e6525bc5b65954c5087604a15f5e8.zip
Merge tag 'perf-tools-2020-08-14' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull more perf tools updates from Arnaldo Carvalho de Melo: "Fixes: - Fixes for 'perf bench numa'. - Always memset source before memcpy in 'perf bench mem'. - Quote CC and CXX for their arguments to fix build in environments using those variables to pass more than just the compiler names. - Fix module symbol processing, addressing regression detected via "perf test". - Allow multiple probes in record+script_probe_vfs_getname.sh 'perf test' entry. Improvements: - Add script to autogenerate socket family name id->string table from copy of kernel header, used so far in 'perf trace'. - 'perf ftrace' improvements to provide similar options for this utility so that one can go from 'perf record', 'perf trace', etc to 'perf ftrace' just by changing the name of the subcommand. - Prefer new "sched:sched_waking" trace event when it exists in 'perf sched' post processing. - Update POWER9 metrics to utilize other metrics. - Fall back to querying debuginfod if debuginfo not found locally. Miscellaneous: - Sync various kvm headers with kernel sources" * tag 'perf-tools-2020-08-14' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (40 commits) perf ftrace: Make option description initials all capital letters perf build-ids: Fall back to debuginfod query if debuginfo not found perf bench numa: Remove dead code in parse_nodes_opt() perf stat: Update POWER9 metrics to utilize other metrics perf ftrace: Add change log perf: ftrace: Add set_tracing_options() to set all trace options perf ftrace: Add option --tid to filter by thread id perf ftrace: Add option -D/--delay to delay tracing perf: ftrace: Allow set graph depth by '--graph-opts' perf ftrace: Add support for trace option tracing_thresh perf ftrace: Add option 'verbose' to show more info for graph tracer perf ftrace: Add support for tracing option 'irq-info' perf ftrace: Add support for trace option funcgraph-irqs perf ftrace: Add support for trace option sleep-time perf ftrace: Add support for tracing option 'func_stack_trace' perf tools: Add general function to parse sublevel options perf ftrace: Add option '--inherit' to trace children processes perf ftrace: Show trace column header perf ftrace: Add option '-m/--buffer-size' to set per-cpu buffer size perf ftrace: Factor out function write_tracing_file_int() ...
Diffstat (limited to 'tools/perf/bench/numa.c')
-rw-r--r--tools/perf/bench/numa.c77
1 files changed, 39 insertions, 38 deletions
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 5797253b9700..f85bceccc459 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -247,17 +247,22 @@ static int is_node_present(int node)
*/
static bool node_has_cpus(int node)
{
- struct bitmask *cpu = numa_allocate_cpumask();
- unsigned int i;
+ struct bitmask *cpumask = numa_allocate_cpumask();
+ bool ret = false; /* fall back to nocpus */
+ int cpu;
- if (cpu && !numa_node_to_cpus(node, cpu)) {
- for (i = 0; i < cpu->size; i++) {
- if (numa_bitmask_isbitset(cpu, i))
- return true;
+ BUG_ON(!cpumask);
+ if (!numa_node_to_cpus(node, cpumask)) {
+ for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
+ if (numa_bitmask_isbitset(cpumask, cpu)) {
+ ret = true;
+ break;
+ }
}
}
+ numa_free_cpumask(cpumask);
- return false; /* lets fall back to nocpus safely */
+ return ret;
}
static cpu_set_t bind_to_cpu(int target_cpu)
@@ -288,14 +293,10 @@ static cpu_set_t bind_to_cpu(int target_cpu)
static cpu_set_t bind_to_node(int target_node)
{
- int cpus_per_node = g->p.nr_cpus / nr_numa_nodes();
cpu_set_t orig_mask, mask;
int cpu;
int ret;
- BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus);
- BUG_ON(!cpus_per_node);
-
ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
BUG_ON(ret);
@@ -305,13 +306,16 @@ static cpu_set_t bind_to_node(int target_node)
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
CPU_SET(cpu, &mask);
} else {
- int cpu_start = (target_node + 0) * cpus_per_node;
- int cpu_stop = (target_node + 1) * cpus_per_node;
+ struct bitmask *cpumask = numa_allocate_cpumask();
- BUG_ON(cpu_stop > g->p.nr_cpus);
-
- for (cpu = cpu_start; cpu < cpu_stop; cpu++)
- CPU_SET(cpu, &mask);
+ BUG_ON(!cpumask);
+ if (!numa_node_to_cpus(target_node, cpumask)) {
+ for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
+ if (numa_bitmask_isbitset(cpumask, cpu))
+ CPU_SET(cpu, &mask);
+ }
+ }
+ numa_free_cpumask(cpumask);
}
ret = sched_setaffinity(0, sizeof(mask), &mask);
@@ -729,8 +733,6 @@ static int parse_nodes_opt(const struct option *opt __maybe_unused,
return -1;
return parse_node_list(arg);
-
- return 0;
}
#define BIT(x) (1ul << x)
@@ -813,12 +815,12 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val
}
}
} else if (!g->p.data_backwards || (nr + loop) & 1) {
+ /* Process data forwards: */
d0 = data + off;
d = data + off + 1;
d1 = data + words;
- /* Process data forwards: */
for (;;) {
if (unlikely(d >= d1))
d = data;
@@ -836,7 +838,6 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val
d = data + off - 1;
d1 = data + words;
- /* Process data forwards: */
for (;;) {
if (unlikely(d < data))
d = data + words-1;
@@ -1733,12 +1734,12 @@ err:
*/
static const char *tests[][MAX_ARGS] = {
/* Basic single-stream NUMA bandwidth measurements: */
- { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
+ { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
"-C" , "0", "-M", "0", OPT_BW_RAM },
{ "RAM-bw-local-NOTHP,",
"mem", "-p", "1", "-t", "1", "-P", "1024",
"-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP },
- { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
+ { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
"-C" , "0", "-M", "1", OPT_BW_RAM },
/* 2-stream NUMA bandwidth measurements: */
@@ -1755,7 +1756,7 @@ static const char *tests[][MAX_ARGS] = {
{ " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV },
{ " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV },
{ " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV },
- { " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
+ { " 2x3-convergence,", "mem", "-p", "2", "-t", "3", "-P", "1020", OPT_CONV },
{ " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
{ " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV },
{ " 4x4-convergence-NOTHP,",
@@ -1780,24 +1781,24 @@ static const char *tests[][MAX_ARGS] = {
"mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP },
{ "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW },
- { " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
- { " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
- { "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
- { "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
+ { " 1x4-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
+ { " 1x8-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
+ { "1x16-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
+ { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
- { " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
- { " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
- { " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
- { " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
- { " 4x8-bw-thread-NOTHP,",
+ { " 2x3-bw-process,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
+ { " 4x4-bw-process,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
+ { " 4x6-bw-process,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
+ { " 4x8-bw-process,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
+ { " 4x8-bw-process-NOTHP,",
"mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP },
- { " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
- { " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
+ { " 3x3-bw-process,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
+ { " 5x5-bw-process,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
- { "2x16-bw-thread,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
- { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
+ { "2x16-bw-process,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
+ { "1x32-bw-process,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
- { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
+ { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
{ "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP },
{ "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW },
{ "numa01-bw-thread-NOTHP,",