Skip to content

Commit 03b216d

Browse files
committed
[X86] Enable inline memcmp() to use AVX512
llvm-svn: 373706
1 parent e4758a5 commit 03b216d

File tree

2 files changed

+21
-2
lines changed

2 files changed

+21
-2
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3394,9 +3394,8 @@ X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
33943394
if (IsZeroCmp) {
33953395
// Only enable vector loads for equality comparison. Right now the vector
33963396
// version is not as fast for three way compare (see #33329).
3397-
// TODO: enable AVX512 when the DAG is ready.
3398-
// if (ST->hasAVX512()) Options.LoadSizes.push_back(64);
33993397
const unsigned PreferredWidth = ST->getPreferVectorWidth();
3398+
if (PreferredWidth >= 512 && ST->hasAVX512()) Options.LoadSizes.push_back(64);
34003399
if (PreferredWidth >= 256 && ST->hasAVX2()) Options.LoadSizes.push_back(32);
34013400
if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16);
34023401
// All GPR and vector loads can be unaligned. SIMD compare requires integer

llvm/test/CodeGen/X86/memcmp.ll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE2
66
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
77
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
8+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512F
9+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512F --check-prefix=X64-AVX512BW
810

911
; This tests codegen time inlining/optimization of memcmp
1012
; rdar://6480398
@@ -1540,6 +1542,15 @@ define i1 @length64_eq(i8* %x, i8* %y) nounwind {
15401542
; X64-AVX2-NEXT: setne %al
15411543
; X64-AVX2-NEXT: vzeroupper
15421544
; X64-AVX2-NEXT: retq
1545+
;
1546+
; X64-AVX512F-LABEL: length64_eq:
1547+
; X64-AVX512F: # %bb.0:
1548+
; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
1549+
; X64-AVX512F-NEXT: vpcmpeqd (%rsi), %zmm0, %k0
1550+
; X64-AVX512F-NEXT: kortestw %k0, %k0
1551+
; X64-AVX512F-NEXT: setae %al
1552+
; X64-AVX512F-NEXT: vzeroupper
1553+
; X64-AVX512F-NEXT: retq
15431554
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 64) nounwind
15441555
%cmp = icmp ne i32 %call, 0
15451556
ret i1 %cmp
@@ -1592,6 +1603,15 @@ define i1 @length64_eq_const(i8* %X) nounwind {
15921603
; X64-AVX2-NEXT: sete %al
15931604
; X64-AVX2-NEXT: vzeroupper
15941605
; X64-AVX2-NEXT: retq
1606+
;
1607+
; X64-AVX512F-LABEL: length64_eq_const:
1608+
; X64-AVX512F: # %bb.0:
1609+
; X64-AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0
1610+
; X64-AVX512F-NEXT: vpcmpeqd {{.*}}(%rip), %zmm0, %k0
1611+
; X64-AVX512F-NEXT: kortestw %k0, %k0
1612+
; X64-AVX512F-NEXT: setb %al
1613+
; X64-AVX512F-NEXT: vzeroupper
1614+
; X64-AVX512F-NEXT: retq
15951615
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
15961616
%c = icmp eq i32 %m, 0
15971617
ret i1 %c

0 commit comments

Comments
 (0)