16
16
#include " llvm/Analysis/ValueTracking.h"
17
17
#include " llvm/IR/GlobalValue.h"
18
18
#include " llvm/IR/Instructions.h"
19
+ #include " llvm/Support/AtomicOrdering.h"
19
20
#include " llvm/Support/CommandLine.h"
20
21
#include " llvm/Support/Debug.h"
21
22
#include " llvm/Support/ErrorHandling.h"
@@ -81,10 +82,12 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
81
82
82
83
switch (N->getOpcode ()) {
83
84
case ISD::LOAD:
85
+ case ISD::ATOMIC_LOAD:
84
86
if (tryLoad (N))
85
87
return ;
86
88
break ;
87
89
case ISD::STORE:
90
+ case ISD::ATOMIC_STORE:
88
91
if (tryStore (N))
89
92
return ;
90
93
break ;
@@ -834,17 +837,27 @@ static Optional<unsigned> pickOpcodeForVT(
834
837
835
838
bool NVPTXDAGToDAGISel::tryLoad (SDNode *N) {
836
839
SDLoc dl (N);
837
- LoadSDNode *LD = cast<LoadSDNode>(N);
840
+ MemSDNode *LD = cast<MemSDNode>(N);
841
+ assert (LD->readMem () && " Expected load" );
842
+ LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N);
838
843
EVT LoadedVT = LD->getMemoryVT ();
839
844
SDNode *NVPTXLD = nullptr ;
840
845
841
846
// do not support pre/post inc/dec
842
- if (LD ->isIndexed ())
847
+ if (PlainLoad && PlainLoad ->isIndexed ())
843
848
return false ;
844
849
845
850
if (!LoadedVT.isSimple ())
846
851
return false ;
847
852
853
+ AtomicOrdering Ordering = LD->getOrdering ();
854
+ // In order to lower atomic loads with stronger guarantees we would need to
855
+ // use load.acquire or insert fences. However these features were only added
856
+ // with PTX ISA 6.0 / sm_70.
857
+ // TODO: Check if we can actually use the new instructions and implement them.
858
+ if (isStrongerThanMonotonic (Ordering))
859
+ return false ;
860
+
848
861
// Address Space Setting
849
862
unsigned int CodeAddrSpace = getCodeAddrSpace (LD);
850
863
if (canLowerToLDG (LD, *Subtarget, CodeAddrSpace, MF)) {
@@ -855,8 +868,9 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
855
868
CurDAG->getDataLayout ().getPointerSizeInBits (LD->getAddressSpace ());
856
869
857
870
// Volatile Setting
858
- // - .volatile is only availalble for .global and .shared
859
- bool isVolatile = LD->isVolatile ();
871
+ // - .volatile is only available for .global and .shared
872
+ // - .volatile has the same memory synchronization semantics as .relaxed.sys
873
+ bool isVolatile = LD->isVolatile () || Ordering == AtomicOrdering::Monotonic;
860
874
if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
861
875
CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
862
876
CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
@@ -882,7 +896,7 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
882
896
fromTypeWidth = 32 ;
883
897
}
884
898
885
- if ((LD ->getExtensionType () == ISD::SEXTLOAD))
899
+ if (PlainLoad && (PlainLoad ->getExtensionType () == ISD::SEXTLOAD))
886
900
fromType = NVPTX::PTXLdStInstCode::Signed;
887
901
else if (ScalarVT.isFloatingPoint ())
888
902
// f16 uses .b16 as its storage type.
@@ -1691,25 +1705,38 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1691
1705
1692
1706
bool NVPTXDAGToDAGISel::tryStore (SDNode *N) {
1693
1707
SDLoc dl (N);
1694
- StoreSDNode *ST = cast<StoreSDNode>(N);
1708
+ MemSDNode *ST = cast<MemSDNode>(N);
1709
+ assert (ST->writeMem () && " Expected store" );
1710
+ StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N);
1711
+ AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N);
1712
+ assert ((PlainStore || AtomicStore) && " Expected store" );
1695
1713
EVT StoreVT = ST->getMemoryVT ();
1696
1714
SDNode *NVPTXST = nullptr ;
1697
1715
1698
1716
// do not support pre/post inc/dec
1699
- if (ST ->isIndexed ())
1717
+ if (PlainStore && PlainStore ->isIndexed ())
1700
1718
return false ;
1701
1719
1702
1720
if (!StoreVT.isSimple ())
1703
1721
return false ;
1704
1722
1723
+ AtomicOrdering Ordering = ST->getOrdering ();
1724
+ // In order to lower atomic loads with stronger guarantees we would need to
1725
+ // use store.release or insert fences. However these features were only added
1726
+ // with PTX ISA 6.0 / sm_70.
1727
+ // TODO: Check if we can actually use the new instructions and implement them.
1728
+ if (isStrongerThanMonotonic (Ordering))
1729
+ return false ;
1730
+
1705
1731
// Address Space Setting
1706
1732
unsigned int CodeAddrSpace = getCodeAddrSpace (ST);
1707
1733
unsigned int PointerSize =
1708
1734
CurDAG->getDataLayout ().getPointerSizeInBits (ST->getAddressSpace ());
1709
1735
1710
1736
// Volatile Setting
1711
- // - .volatile is only availalble for .global and .shared
1712
- bool isVolatile = ST->isVolatile ();
1737
+ // - .volatile is only available for .global and .shared
1738
+ // - .volatile has the same memory synchronization semantics as .relaxed.sys
1739
+ bool isVolatile = ST->isVolatile () || Ordering == AtomicOrdering::Monotonic;
1713
1740
if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1714
1741
CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1715
1742
CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
@@ -1739,41 +1766,53 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1739
1766
toType = NVPTX::PTXLdStInstCode::Unsigned;
1740
1767
1741
1768
// Create the machine instruction DAG
1742
- SDValue Chain = N-> getOperand ( 0 );
1743
- SDValue N1 = N-> getOperand ( 1 );
1744
- SDValue N2 = N-> getOperand ( 2 );
1769
+ SDValue Chain = ST-> getChain ( );
1770
+ SDValue Value = PlainStore ? PlainStore-> getValue () : AtomicStore-> getVal ( );
1771
+ SDValue BasePtr = ST-> getBasePtr ( );
1745
1772
SDValue Addr;
1746
1773
SDValue Offset, Base;
1747
1774
Optional<unsigned > Opcode;
1748
- MVT::SimpleValueType SourceVT = N1.getNode ()->getSimpleValueType (0 ).SimpleTy ;
1775
+ MVT::SimpleValueType SourceVT =
1776
+ Value.getNode ()->getSimpleValueType (0 ).SimpleTy ;
1749
1777
1750
- if (SelectDirectAddr (N2 , Addr)) {
1778
+ if (SelectDirectAddr (BasePtr , Addr)) {
1751
1779
Opcode = pickOpcodeForVT (SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1752
1780
NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1753
1781
NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
1754
1782
NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1755
1783
if (!Opcode)
1756
1784
return false ;
1757
- SDValue Ops[] = { N1, getI32Imm (isVolatile, dl),
1758
- getI32Imm (CodeAddrSpace, dl), getI32Imm (vecType, dl),
1759
- getI32Imm (toType, dl), getI32Imm (toTypeWidth, dl), Addr,
1760
- Chain };
1785
+ SDValue Ops[] = {Value,
1786
+ getI32Imm (isVolatile, dl),
1787
+ getI32Imm (CodeAddrSpace, dl),
1788
+ getI32Imm (vecType, dl),
1789
+ getI32Imm (toType, dl),
1790
+ getI32Imm (toTypeWidth, dl),
1791
+ Addr,
1792
+ Chain};
1761
1793
NVPTXST = CurDAG->getMachineNode (Opcode.getValue (), dl, MVT::Other, Ops);
1762
- } else if (PointerSize == 64 ? SelectADDRsi64 (N2.getNode (), N2, Base, Offset)
1763
- : SelectADDRsi (N2.getNode (), N2, Base, Offset)) {
1794
+ } else if (PointerSize == 64
1795
+ ? SelectADDRsi64 (BasePtr.getNode (), BasePtr, Base, Offset)
1796
+ : SelectADDRsi (BasePtr.getNode (), BasePtr, Base, Offset)) {
1764
1797
Opcode = pickOpcodeForVT (SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1765
1798
NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1766
1799
NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
1767
1800
NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1768
1801
if (!Opcode)
1769
1802
return false ;
1770
- SDValue Ops[] = { N1, getI32Imm (isVolatile, dl),
1771
- getI32Imm (CodeAddrSpace, dl), getI32Imm (vecType, dl),
1772
- getI32Imm (toType, dl), getI32Imm (toTypeWidth, dl), Base,
1773
- Offset, Chain };
1803
+ SDValue Ops[] = {Value,
1804
+ getI32Imm (isVolatile, dl),
1805
+ getI32Imm (CodeAddrSpace, dl),
1806
+ getI32Imm (vecType, dl),
1807
+ getI32Imm (toType, dl),
1808
+ getI32Imm (toTypeWidth, dl),
1809
+ Base,
1810
+ Offset,
1811
+ Chain};
1774
1812
NVPTXST = CurDAG->getMachineNode (Opcode.getValue (), dl, MVT::Other, Ops);
1775
- } else if (PointerSize == 64 ? SelectADDRri64 (N2.getNode (), N2, Base, Offset)
1776
- : SelectADDRri (N2.getNode (), N2, Base, Offset)) {
1813
+ } else if (PointerSize == 64
1814
+ ? SelectADDRri64 (BasePtr.getNode (), BasePtr, Base, Offset)
1815
+ : SelectADDRri (BasePtr.getNode (), BasePtr, Base, Offset)) {
1777
1816
if (PointerSize == 64 )
1778
1817
Opcode = pickOpcodeForVT (
1779
1818
SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
@@ -1787,10 +1826,15 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1787
1826
if (!Opcode)
1788
1827
return false ;
1789
1828
1790
- SDValue Ops[] = { N1, getI32Imm (isVolatile, dl),
1791
- getI32Imm (CodeAddrSpace, dl), getI32Imm (vecType, dl),
1792
- getI32Imm (toType, dl), getI32Imm (toTypeWidth, dl), Base,
1793
- Offset, Chain };
1829
+ SDValue Ops[] = {Value,
1830
+ getI32Imm (isVolatile, dl),
1831
+ getI32Imm (CodeAddrSpace, dl),
1832
+ getI32Imm (vecType, dl),
1833
+ getI32Imm (toType, dl),
1834
+ getI32Imm (toTypeWidth, dl),
1835
+ Base,
1836
+ Offset,
1837
+ Chain};
1794
1838
NVPTXST = CurDAG->getMachineNode (Opcode.getValue (), dl, MVT::Other, Ops);
1795
1839
} else {
1796
1840
if (PointerSize == 64 )
@@ -1806,10 +1850,14 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
1806
1850
NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1807
1851
if (!Opcode)
1808
1852
return false ;
1809
- SDValue Ops[] = { N1, getI32Imm (isVolatile, dl),
1810
- getI32Imm (CodeAddrSpace, dl), getI32Imm (vecType, dl),
1811
- getI32Imm (toType, dl), getI32Imm (toTypeWidth, dl), N2,
1812
- Chain };
1853
+ SDValue Ops[] = {Value,
1854
+ getI32Imm (isVolatile, dl),
1855
+ getI32Imm (CodeAddrSpace, dl),
1856
+ getI32Imm (vecType, dl),
1857
+ getI32Imm (toType, dl),
1858
+ getI32Imm (toTypeWidth, dl),
1859
+ BasePtr,
1860
+ Chain};
1813
1861
NVPTXST = CurDAG->getMachineNode (Opcode.getValue (), dl, MVT::Other, Ops);
1814
1862
}
1815
1863
0 commit comments