forked from beagleboard/linux
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
lib/raid6: Add AVX2 optimized recovery functions
Optimize RAID6 recovery functions to take advantage of the 256-bit YMM integer instructions introduced in AVX2. The patch was tested and benchmarked before submission. However hardware is not yet released so benchmark numbers cannot be reported. Acked-by: "H. Peter Anvin" <[email protected]> Signed-off-by: Jim Kukunas <[email protected]> Signed-off-by: NeilBrown <[email protected]>
- Loading branch information
Showing
7 changed files
with
345 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,327 @@ | ||
/* | ||
* Copyright (C) 2012 Intel Corporation | ||
* Author: Jim Kukunas <[email protected]> | ||
* | ||
* This program is free software; you can redistribute it and/or | ||
* modify it under the terms of the GNU General Public License | ||
* as published by the Free Software Foundation; version 2 | ||
* of the License. | ||
*/ | ||
|
||
#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) | ||
|
||
#if CONFIG_AS_AVX2 | ||
|
||
#include <linux/raid/pq.h> | ||
#include "x86.h" | ||
|
||
static int raid6_has_avx2(void) | ||
{ | ||
return boot_cpu_has(X86_FEATURE_AVX2) && | ||
boot_cpu_has(X86_FEATURE_AVX); | ||
} | ||
|
||
static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila, | ||
int failb, void **ptrs) | ||
{ | ||
u8 *p, *q, *dp, *dq; | ||
const u8 *pbmul; /* P multiplier table for B data */ | ||
const u8 *qmul; /* Q multiplier table (for both) */ | ||
const u8 x0f = 0x0f; | ||
|
||
p = (u8 *)ptrs[disks-2]; | ||
q = (u8 *)ptrs[disks-1]; | ||
|
||
/* Compute syndrome with zero for the missing data pages | ||
Use the dead data pages as temporary storage for | ||
delta p and delta q */ | ||
dp = (u8 *)ptrs[faila]; | ||
ptrs[faila] = (void *)raid6_empty_zero_page; | ||
ptrs[disks-2] = dp; | ||
dq = (u8 *)ptrs[failb]; | ||
ptrs[failb] = (void *)raid6_empty_zero_page; | ||
ptrs[disks-1] = dq; | ||
|
||
raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
|
||
/* Restore pointer table */ | ||
ptrs[faila] = dp; | ||
ptrs[failb] = dq; | ||
ptrs[disks-2] = p; | ||
ptrs[disks-1] = q; | ||
|
||
/* Now, pick the proper data tables */ | ||
pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; | ||
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ | ||
raid6_gfexp[failb]]]; | ||
|
||
kernel_fpu_begin(); | ||
|
||
/* ymm0 = x0f[16] */ | ||
asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f)); | ||
|
||
while (bytes) { | ||
#ifdef CONFIG_X86_64 | ||
asm volatile("vmovdqa %0, %%ymm1" : : "m" (q[0])); | ||
asm volatile("vmovdqa %0, %%ymm9" : : "m" (q[32])); | ||
asm volatile("vmovdqa %0, %%ymm0" : : "m" (p[0])); | ||
asm volatile("vmovdqa %0, %%ymm8" : : "m" (p[32])); | ||
asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (dq[0])); | ||
asm volatile("vpxor %0, %%ymm9, %%ymm9" : : "m" (dq[32])); | ||
asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (dp[0])); | ||
asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (dp[32])); | ||
|
||
/* | ||
* 1 = dq[0] ^ q[0] | ||
* 9 = dq[32] ^ q[32] | ||
* 0 = dp[0] ^ p[0] | ||
* 8 = dp[32] ^ p[32] | ||
*/ | ||
|
||
asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0])); | ||
asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16])); | ||
|
||
asm volatile("vpsraw $4, %ymm1, %ymm3"); | ||
asm volatile("vpsraw $4, %ymm9, %ymm12"); | ||
asm volatile("vpand %ymm7, %ymm1, %ymm1"); | ||
asm volatile("vpand %ymm7, %ymm9, %ymm9"); | ||
asm volatile("vpand %ymm7, %ymm3, %ymm3"); | ||
asm volatile("vpand %ymm7, %ymm12, %ymm12"); | ||
asm volatile("vpshufb %ymm9, %ymm4, %ymm14"); | ||
asm volatile("vpshufb %ymm1, %ymm4, %ymm4"); | ||
asm volatile("vpshufb %ymm12, %ymm5, %ymm15"); | ||
asm volatile("vpshufb %ymm3, %ymm5, %ymm5"); | ||
asm volatile("vpxor %ymm14, %ymm15, %ymm15"); | ||
asm volatile("vpxor %ymm4, %ymm5, %ymm5"); | ||
|
||
/* | ||
* 5 = qx[0] | ||
* 15 = qx[32] | ||
*/ | ||
|
||
asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0])); | ||
asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16])); | ||
asm volatile("vpsraw $4, %ymm0, %ymm2"); | ||
asm volatile("vpsraw $4, %ymm8, %ymm6"); | ||
asm volatile("vpand %ymm7, %ymm0, %ymm3"); | ||
asm volatile("vpand %ymm7, %ymm8, %ymm14"); | ||
asm volatile("vpand %ymm7, %ymm2, %ymm2"); | ||
asm volatile("vpand %ymm7, %ymm6, %ymm6"); | ||
asm volatile("vpshufb %ymm14, %ymm4, %ymm12"); | ||
asm volatile("vpshufb %ymm3, %ymm4, %ymm4"); | ||
asm volatile("vpshufb %ymm6, %ymm1, %ymm13"); | ||
asm volatile("vpshufb %ymm2, %ymm1, %ymm1"); | ||
asm volatile("vpxor %ymm4, %ymm1, %ymm1"); | ||
asm volatile("vpxor %ymm12, %ymm13, %ymm13"); | ||
|
||
/* | ||
* 1 = pbmul[px[0]] | ||
* 13 = pbmul[px[32]] | ||
*/ | ||
asm volatile("vpxor %ymm5, %ymm1, %ymm1"); | ||
asm volatile("vpxor %ymm15, %ymm13, %ymm13"); | ||
|
||
/* | ||
* 1 = db = DQ | ||
* 13 = db[32] = DQ[32] | ||
*/ | ||
asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0])); | ||
asm volatile("vmovdqa %%ymm13,%0" : "=m" (dq[32])); | ||
asm volatile("vpxor %ymm1, %ymm0, %ymm0"); | ||
asm volatile("vpxor %ymm13, %ymm8, %ymm8"); | ||
|
||
asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0])); | ||
asm volatile("vmovdqa %%ymm8, %0" : "=m" (dp[32])); | ||
|
||
bytes -= 64; | ||
p += 64; | ||
q += 64; | ||
dp += 64; | ||
dq += 64; | ||
#else | ||
asm volatile("vmovdqa %0, %%ymm1" : : "m" (*q)); | ||
asm volatile("vmovdqa %0, %%ymm0" : : "m" (*p)); | ||
asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (*dq)); | ||
asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (*dp)); | ||
|
||
/* 1 = dq ^ q; 0 = dp ^ p */ | ||
|
||
asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0])); | ||
asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16])); | ||
|
||
/* | ||
* 1 = dq ^ q | ||
* 3 = dq ^ p >> 4 | ||
*/ | ||
asm volatile("vpsraw $4, %ymm1, %ymm3"); | ||
asm volatile("vpand %ymm7, %ymm1, %ymm1"); | ||
asm volatile("vpand %ymm7, %ymm3, %ymm3"); | ||
asm volatile("vpshufb %ymm1, %ymm4, %ymm4"); | ||
asm volatile("vpshufb %ymm3, %ymm5, %ymm5"); | ||
asm volatile("vpxor %ymm4, %ymm5, %ymm5"); | ||
|
||
/* 5 = qx */ | ||
|
||
asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0])); | ||
asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16])); | ||
|
||
asm volatile("vpsraw $4, %ymm0, %ymm2"); | ||
asm volatile("vpand %ymm7, %ymm0, %ymm3"); | ||
asm volatile("vpand %ymm7, %ymm2, %ymm2"); | ||
asm volatile("vpshufb %ymm3, %ymm4, %ymm4"); | ||
asm volatile("vpshufb %ymm2, %ymm1, %ymm1"); | ||
asm volatile("vpxor %ymm4, %ymm1, %ymm1"); | ||
|
||
/* 1 = pbmul[px] */ | ||
asm volatile("vpxor %ymm5, %ymm1, %ymm1"); | ||
/* 1 = db = DQ */ | ||
asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0])); | ||
|
||
asm volatile("vpxor %ymm1, %ymm0, %ymm0"); | ||
asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0])); | ||
|
||
bytes -= 32; | ||
p += 32; | ||
q += 32; | ||
dp += 32; | ||
dq += 32; | ||
#endif | ||
} | ||
|
||
kernel_fpu_end(); | ||
} | ||
|
||
static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila, | ||
void **ptrs) | ||
{ | ||
u8 *p, *q, *dq; | ||
const u8 *qmul; /* Q multiplier table */ | ||
const u8 x0f = 0x0f; | ||
|
||
p = (u8 *)ptrs[disks-2]; | ||
q = (u8 *)ptrs[disks-1]; | ||
|
||
/* Compute syndrome with zero for the missing data page | ||
Use the dead data page as temporary storage for delta q */ | ||
dq = (u8 *)ptrs[faila]; | ||
ptrs[faila] = (void *)raid6_empty_zero_page; | ||
ptrs[disks-1] = dq; | ||
|
||
raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
|
||
/* Restore pointer table */ | ||
ptrs[faila] = dq; | ||
ptrs[disks-1] = q; | ||
|
||
/* Now, pick the proper data tables */ | ||
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; | ||
|
||
kernel_fpu_begin(); | ||
|
||
asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f)); | ||
|
||
while (bytes) { | ||
#ifdef CONFIG_X86_64 | ||
asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0])); | ||
asm volatile("vmovdqa %0, %%ymm8" : : "m" (dq[32])); | ||
asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0])); | ||
asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (q[32])); | ||
|
||
/* | ||
* 3 = q[0] ^ dq[0] | ||
* 8 = q[32] ^ dq[32] | ||
*/ | ||
asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0])); | ||
asm volatile("vmovapd %ymm0, %ymm13"); | ||
asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16])); | ||
asm volatile("vmovapd %ymm1, %ymm14"); | ||
|
||
asm volatile("vpsraw $4, %ymm3, %ymm6"); | ||
asm volatile("vpsraw $4, %ymm8, %ymm12"); | ||
asm volatile("vpand %ymm7, %ymm3, %ymm3"); | ||
asm volatile("vpand %ymm7, %ymm8, %ymm8"); | ||
asm volatile("vpand %ymm7, %ymm6, %ymm6"); | ||
asm volatile("vpand %ymm7, %ymm12, %ymm12"); | ||
asm volatile("vpshufb %ymm3, %ymm0, %ymm0"); | ||
asm volatile("vpshufb %ymm8, %ymm13, %ymm13"); | ||
asm volatile("vpshufb %ymm6, %ymm1, %ymm1"); | ||
asm volatile("vpshufb %ymm12, %ymm14, %ymm14"); | ||
asm volatile("vpxor %ymm0, %ymm1, %ymm1"); | ||
asm volatile("vpxor %ymm13, %ymm14, %ymm14"); | ||
|
||
/* | ||
* 1 = qmul[q[0] ^ dq[0]] | ||
* 14 = qmul[q[32] ^ dq[32]] | ||
*/ | ||
asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0])); | ||
asm volatile("vmovdqa %0, %%ymm12" : : "m" (p[32])); | ||
asm volatile("vpxor %ymm1, %ymm2, %ymm2"); | ||
asm volatile("vpxor %ymm14, %ymm12, %ymm12"); | ||
|
||
/* | ||
* 2 = p[0] ^ qmul[q[0] ^ dq[0]] | ||
* 12 = p[32] ^ qmul[q[32] ^ dq[32]] | ||
*/ | ||
|
||
asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0])); | ||
asm volatile("vmovdqa %%ymm14, %0" : "=m" (dq[32])); | ||
asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0])); | ||
asm volatile("vmovdqa %%ymm12,%0" : "=m" (p[32])); | ||
|
||
bytes -= 64; | ||
p += 64; | ||
q += 64; | ||
dq += 64; | ||
#else | ||
asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0])); | ||
asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0])); | ||
|
||
/* 3 = q ^ dq */ | ||
|
||
asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0])); | ||
asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16])); | ||
|
||
asm volatile("vpsraw $4, %ymm3, %ymm6"); | ||
asm volatile("vpand %ymm7, %ymm3, %ymm3"); | ||
asm volatile("vpand %ymm7, %ymm6, %ymm6"); | ||
asm volatile("vpshufb %ymm3, %ymm0, %ymm0"); | ||
asm volatile("vpshufb %ymm6, %ymm1, %ymm1"); | ||
asm volatile("vpxor %ymm0, %ymm1, %ymm1"); | ||
|
||
/* 1 = qmul[q ^ dq] */ | ||
|
||
asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0])); | ||
asm volatile("vpxor %ymm1, %ymm2, %ymm2"); | ||
|
||
/* 2 = p ^ qmul[q ^ dq] */ | ||
|
||
asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0])); | ||
asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0])); | ||
|
||
bytes -= 32; | ||
p += 32; | ||
q += 32; | ||
dq += 32; | ||
#endif | ||
} | ||
|
||
kernel_fpu_end(); | ||
} | ||
|
||
const struct raid6_recov_calls raid6_recov_avx2 = { | ||
.data2 = raid6_2data_recov_avx2, | ||
.datap = raid6_datap_recov_avx2, | ||
.valid = raid6_has_avx2, | ||
#ifdef CONFIG_X86_64 | ||
.name = "avx2x2", | ||
#else | ||
.name = "avx2x1", | ||
#endif | ||
.priority = 2, | ||
}; | ||
|
||
#else | ||
#warning "your version of binutils lacks AVX2 support" | ||
#endif | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.