From 38640c480939d56cc8b03d58642fc5261761a697 Mon Sep 17 00:00:00 2001 From: Dirk Müller Date: Wed, 5 Jan 2022 17:38:46 +0100 Subject: lib/raid6: skip benchmark of non-chosen xor_syndrome functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit fe5cbc6e06c7 ("md/raid6 algorithms: delta syndrome functions") a xor_syndrome() benchmarking was added also to the raid6_choose_gen() function. However, the results of that benchmarking were intentionally discarded and did not influence the choice. It picked the xor_syndrome() variant related to the best performing gen_syndrome(). Reduce runtime of raid6_choose_gen() without modifying its outcome by only benchmarking the xor_syndrome() of the best gen_syndrome() variant. For a HZ=250 x86_64 system with avx2 and without avx512 this removes 5 out of 6 xor() benchmarks, saving 340ms of raid6 initialization time. Signed-off-by: Dirk Müller Signed-off-by: Song Liu --- lib/raid6/algos.c | 76 +++++++++++++++++++++++++++---------------------------- 1 file changed, 37 insertions(+), 39 deletions(-) (limited to 'lib/raid6/algos.c') diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 6d5e5000fdd7..9b7e8a837b27 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -145,12 +145,12 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) static inline const struct raid6_calls *raid6_choose_gen( void *(*const dptrs)[RAID6_TEST_DISKS], const int disks) { - unsigned long perf, bestgenperf, bestxorperf, j0, j1; + unsigned long perf, bestgenperf, j0, j1; int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */ const struct raid6_calls *const *algo; const struct raid6_calls *best; - for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { + for (bestgenperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { if (!best || (*algo)->prefer >= best->prefer) { if ((*algo)->valid && !(*algo)->valid()) continue; @@ -180,50 +180,48 @@ static inline const struct raid6_calls *raid6_choose_gen( pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name, (perf * HZ * (disks-2)) >> (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2)); + } + } - if (!(*algo)->xor_syndrome) - continue; + if (!best) { + pr_err("raid6: Yikes! No algorithm found!\n"); + goto out; + } - perf = 0; + raid6_call = *best; - preempt_disable(); - j0 = jiffies; - while ((j1 = jiffies) == j0) - cpu_relax(); - while (time_before(jiffies, - j1 + (1<xor_syndrome(disks, start, stop, - PAGE_SIZE, *dptrs); - perf++; - } - preempt_enable(); - - if (best == *algo) - bestxorperf = perf; + if (!IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK)) { + pr_info("raid6: skipped pq benchmark and selected %s\n", + best->name); + goto out; + } - pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name, - (perf * HZ * (disks-2)) >> - (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1)); + pr_info("raid6: using algorithm %s gen() %ld MB/s\n", + best->name, + (bestgenperf * HZ * (disks - 2)) >> + (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2)); + + if (best->xor_syndrome) { + perf = 0; + + preempt_disable(); + j0 = jiffies; + while ((j1 = jiffies) == j0) + cpu_relax(); + while (time_before(jiffies, + j1 + (1 << RAID6_TIME_JIFFIES_LG2))) { + best->xor_syndrome(disks, start, stop, + PAGE_SIZE, *dptrs); + perf++; } - } + preempt_enable(); - if (best) { - if (IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK)) { - pr_info("raid6: using algorithm %s gen() %ld MB/s\n", - best->name, - (bestgenperf * HZ * (disks-2)) >> - (20 - PAGE_SHIFT+RAID6_TIME_JIFFIES_LG2)); - if (best->xor_syndrome) - pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", - (bestxorperf * HZ * (disks-2)) >> - (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1)); - } else - pr_info("raid6: skip pq benchmark and using algorithm %s\n", - best->name); - raid6_call = *best; - } else - pr_err("raid6: Yikes! No algorithm found!\n"); + pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", + (perf * HZ * (disks - 2)) >> + (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1)); + } +out: return best; } -- cgit From 36dacddbf0bdba86cd00f066b4d724157eeb63f1 Mon Sep 17 00:00:00 2001 From: Dirk Müller Date: Wed, 5 Jan 2022 17:38:47 +0100 Subject: lib/raid6: Use strict priority ranking for pq gen() benchmarking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On x86_64, currently 3 variants of AVX512, 3 variants of AVX2 and 3 variants of SSE2 are benchmarked on initialization, taking between 144-153 jiffies. Testing across a hardware pool of various generations of intel cpus I could not find a single case where SSE2 won over AVX2 or AVX512. There are cases where AVX2 wins over AVX512 however. Change "prefer" into an integer priority field (similar to how recov selection works) to have more than one ranking level available, which is backwards compatible with existing behavior. Give AVX2/512 variants higher priority over SSE2 in order to skip SSE testing when AVX is available. in a AVX2/x86_64/HZ=250 case this saves in the order of 200ms of initialization time. Signed-off-by: Dirk Müller Acked-by: Paul Menzel Signed-off-by: Song Liu --- lib/raid6/algos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/raid6/algos.c') diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 9b7e8a837b27..39b74221f4a7 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -151,7 +151,7 @@ static inline const struct raid6_calls *raid6_choose_gen( const struct raid6_calls *best; for (bestgenperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { - if (!best || (*algo)->prefer >= best->prefer) { + if (!best || (*algo)->priority >= best->priority) { if ((*algo)->valid && !(*algo)->valid()) continue; -- cgit