Skip to content

Commit f5fe785

Browse files
committed
Revert r329956, "AArch64: Introduce a DAG combine for folding offsets into addresses."
Caused a hang and eventually an assertion failure in LTO builds of 7zip-benchmark on aarch64 iOS targets. http://green.lab.llvm.org/green/job/lnt-ctmark-aarch64-O3-flto/2024/ git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@330063 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent c6388fd commit f5fe785

8 files changed

+83
-240
lines changed

lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -743,16 +743,14 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
743743
if (!GAN)
744744
return true;
745745

746-
if (GAN->getOffset() % Size == 0) {
747-
const GlobalValue *GV = GAN->getGlobal();
748-
unsigned Alignment = GV->getAlignment();
749-
Type *Ty = GV->getValueType();
750-
if (Alignment == 0 && Ty->isSized())
751-
Alignment = DL.getABITypeAlignment(Ty);
752-
753-
if (Alignment >= Size)
754-
return true;
755-
}
746+
const GlobalValue *GV = GAN->getGlobal();
747+
unsigned Alignment = GV->getAlignment();
748+
Type *Ty = GV->getValueType();
749+
if (Alignment == 0 && Ty->isSized())
750+
Alignment = DL.getABITypeAlignment(Ty);
751+
752+
if (Alignment >= Size)
753+
return true;
756754
}
757755

758756
if (CurDAG->isBaseWithConstantOffset(N)) {

lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 7 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -577,8 +577,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
577577
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
578578
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
579579

580-
setTargetDAGCombine(ISD::GlobalAddress);
581-
582580
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
583581
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
584582
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
@@ -3679,8 +3677,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
36793677
SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
36803678
SelectionDAG &DAG,
36813679
unsigned Flag) const {
3682-
return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
3683-
N->getOffset(), Flag);
3680+
return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag);
36843681
}
36853682

36863683
SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
@@ -3755,9 +3752,8 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
37553752
unsigned char OpFlags =
37563753
Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
37573754

3758-
if (OpFlags != AArch64II::MO_NO_FLAG)
3759-
assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
3760-
"unexpected offset in global node");
3755+
assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
3756+
"unexpected offset in global node");
37613757

37623758
// This also catches the large code model case for Darwin.
37633759
if ((OpFlags & AArch64II::MO_GOT) != 0) {
@@ -4995,8 +4991,10 @@ SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
49954991

49964992
bool AArch64TargetLowering::isOffsetFoldingLegal(
49974993
const GlobalAddressSDNode *GA) const {
4998-
// Offsets are folded in the DAG combine rather than here so that we can
4999-
// intelligently choose an offset based on the uses.
4994+
DEBUG(dbgs() << "Skipping offset folding global address: ");
4995+
DEBUG(GA->dump());
4996+
DEBUG(dbgs() << "AArch64 doesn't support folding offsets into global "
4997+
"addresses\n");
50004998
return false;
50014999
}
50025000

@@ -10619,53 +10617,6 @@ static SDValue performNVCASTCombine(SDNode *N) {
1061910617
return SDValue();
1062010618
}
1062110619

10622-
// If all users of the globaladdr are of the form (globaladdr + constant), find
10623-
// the smallest constant, fold it into the globaladdr's offset and rewrite the
10624-
// globaladdr as (globaladdr + constant) - constant.
10625-
static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
10626-
const AArch64Subtarget *Subtarget,
10627-
const TargetMachine &TM) {
10628-
auto *GN = dyn_cast<GlobalAddressSDNode>(N);
10629-
if (!GN || Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
10630-
AArch64II::MO_NO_FLAG)
10631-
return SDValue();
10632-
10633-
uint64_t MinOffset = -1ull;
10634-
for (SDNode *N : GN->uses()) {
10635-
if (N->getOpcode() != ISD::ADD)
10636-
return SDValue();
10637-
auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
10638-
if (!C)
10639-
C = dyn_cast<ConstantSDNode>(N->getOperand(1));
10640-
if (!C)
10641-
return SDValue();
10642-
MinOffset = std::min(MinOffset, C->getZExtValue());
10643-
}
10644-
uint64_t Offset = MinOffset + GN->getOffset();
10645-
10646-
// Check whether folding this offset is legal. It must not go out of bounds of
10647-
// the referenced object to avoid violating the code model, and must be
10648-
// smaller than 2^21 because this is the largest offset expressible in all
10649-
// object formats.
10650-
//
10651-
// This check also prevents us from folding negative offsets, which will end
10652-
// up being treated in the same way as large positive ones. They could also
10653-
// cause code model violations, and aren't really common enough to matter.
10654-
if (Offset >= (1 << 21))
10655-
return SDValue();
10656-
10657-
const GlobalValue *GV = GN->getGlobal();
10658-
Type *T = GV->getValueType();
10659-
if (!T->isSized() ||
10660-
Offset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
10661-
return SDValue();
10662-
10663-
SDLoc DL(GN);
10664-
SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
10665-
return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
10666-
DAG.getConstant(MinOffset, DL, MVT::i64));
10667-
}
10668-
1066910620
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
1067010621
DAGCombinerInfo &DCI) const {
1067110622
SelectionDAG &DAG = DCI.DAG;
@@ -10753,8 +10704,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
1075310704
default:
1075410705
break;
1075510706
}
10756-
case ISD::GlobalAddress:
10757-
return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
1075810707
}
1075910708
return SDValue();
1076010709
}

test/CodeGen/AArch64/arm64-addrmode.ll

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,42 +5,43 @@
55

66
; base + offset (imm9)
77
; CHECK: @t1
8-
; CHECK: ldr xzr, [x0, #8]
8+
; CHECK: ldr xzr, [x{{[0-9]+}}, #8]
99
; CHECK: ret
10-
define void @t1(i64* %object) {
11-
%incdec.ptr = getelementptr inbounds i64, i64* %object, i64 1
10+
define void @t1() {
11+
%incdec.ptr = getelementptr inbounds i64, i64* @object, i64 1
1212
%tmp = load volatile i64, i64* %incdec.ptr, align 8
1313
ret void
1414
}
1515

1616
; base + offset (> imm9)
1717
; CHECK: @t2
18-
; CHECK: sub [[ADDREG:x[0-9]+]], x0, #264
18+
; CHECK: sub [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #264
1919
; CHECK: ldr xzr, [
20+
; CHECK: [[ADDREG]]]
2021
; CHECK: ret
21-
define void @t2(i64* %object) {
22-
%incdec.ptr = getelementptr inbounds i64, i64* %object, i64 -33
22+
define void @t2() {
23+
%incdec.ptr = getelementptr inbounds i64, i64* @object, i64 -33
2324
%tmp = load volatile i64, i64* %incdec.ptr, align 8
2425
ret void
2526
}
2627

2728
; base + unsigned offset (> imm9 and <= imm12 * size of type in bytes)
2829
; CHECK: @t3
29-
; CHECK: ldr xzr, [x0, #32760]
30+
; CHECK: ldr xzr, [x{{[0-9]+}}, #32760]
3031
; CHECK: ret
31-
define void @t3(i64* %object) {
32-
%incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4095
32+
define void @t3() {
33+
%incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4095
3334
%tmp = load volatile i64, i64* %incdec.ptr, align 8
3435
ret void
3536
}
3637

3738
; base + unsigned offset (> imm12 * size of type in bytes)
3839
; CHECK: @t4
3940
; CHECK: orr w[[NUM:[0-9]+]], wzr, #0x8000
40-
; CHECK: ldr xzr, [x0, x[[NUM]]]
41+
; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]
4142
; CHECK: ret
42-
define void @t4(i64* %object) {
43-
%incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4096
43+
define void @t4() {
44+
%incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4096
4445
%tmp = load volatile i64, i64* %incdec.ptr, align 8
4546
ret void
4647
}
@@ -57,12 +58,12 @@ define void @t5(i64 %a) {
5758

5859
; base + reg + imm
5960
; CHECK: @t6
60-
; CHECK: add [[ADDREG:x[0-9]+]], x1, x0, lsl #3
61+
; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3
6162
; CHECK-NEXT: orr w[[NUM:[0-9]+]], wzr, #0x8000
6263
; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]
6364
; CHECK: ret
64-
define void @t6(i64 %a, i64* %object) {
65-
%tmp1 = getelementptr inbounds i64, i64* %object, i64 %a
65+
define void @t6(i64 %a) {
66+
%tmp1 = getelementptr inbounds i64, i64* @object, i64 %a
6667
%incdec.ptr = getelementptr inbounds i64, i64* %tmp1, i64 4096
6768
%tmp = load volatile i64, i64* %incdec.ptr, align 8
6869
ret void

0 commit comments

Comments
 (0)