@@ -82,6 +82,8 @@ class AArch64InstructionSelector : public InstructionSelector {
82
82
unsigned emitConstantPoolEntry (Constant *CPVal, MachineFunction &MF) const ;
83
83
MachineInstr *emitLoadFromConstantPool (Constant *CPVal,
84
84
MachineIRBuilder &MIRBuilder) const ;
85
+ MachineInstr *emitVectorConcat (unsigned Op1, unsigned Op2,
86
+ MachineIRBuilder &MIRBuilder) const ;
85
87
86
88
ComplexRendererFns selectArithImmed (MachineOperand &Root) const ;
87
89
@@ -1965,6 +1967,98 @@ MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
1965
1967
return &*Load;
1966
1968
}
1967
1969
1970
+ // / Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
1971
+ // / size and RB.
1972
+ static std::pair<unsigned , unsigned >
1973
+ getInsertVecEltOpInfo (const RegisterBank &RB, unsigned EltSize) {
1974
+ unsigned Opc, SubregIdx;
1975
+ if (RB.getID () == AArch64::GPRRegBankID) {
1976
+ if (EltSize == 32 ) {
1977
+ Opc = AArch64::INSvi32gpr;
1978
+ SubregIdx = AArch64::ssub;
1979
+ } else if (EltSize == 64 ) {
1980
+ Opc = AArch64::INSvi64gpr;
1981
+ SubregIdx = AArch64::dsub;
1982
+ } else {
1983
+ llvm_unreachable (" invalid elt size!" );
1984
+ }
1985
+ } else {
1986
+ if (EltSize == 8 ) {
1987
+ Opc = AArch64::INSvi8lane;
1988
+ SubregIdx = AArch64::bsub;
1989
+ } else if (EltSize == 16 ) {
1990
+ Opc = AArch64::INSvi16lane;
1991
+ SubregIdx = AArch64::hsub;
1992
+ } else if (EltSize == 32 ) {
1993
+ Opc = AArch64::INSvi32lane;
1994
+ SubregIdx = AArch64::ssub;
1995
+ } else if (EltSize == 64 ) {
1996
+ Opc = AArch64::INSvi64lane;
1997
+ SubregIdx = AArch64::dsub;
1998
+ } else {
1999
+ llvm_unreachable (" invalid elt size!" );
2000
+ }
2001
+ }
2002
+ return std::make_pair (Opc, SubregIdx);
2003
+ }
2004
+
2005
+ MachineInstr *AArch64InstructionSelector::emitVectorConcat (
2006
+ unsigned Op1, unsigned Op2, MachineIRBuilder &MIRBuilder) const {
2007
+ // We implement a vector concat by:
2008
+ // 1. Use scalar_to_vector to insert the lower vector into the larger dest
2009
+ // 2. Insert the upper vector into the destination's upper element
2010
+ // TODO: some of this code is common with G_BUILD_VECTOR handling.
2011
+ MachineRegisterInfo &MRI = MIRBuilder.getMF ().getRegInfo ();
2012
+
2013
+ const LLT Op1Ty = MRI.getType (Op1);
2014
+ const LLT Op2Ty = MRI.getType (Op2);
2015
+
2016
+ if (Op1Ty != Op2Ty) {
2017
+ LLVM_DEBUG (dbgs () << " Could not do vector concat of differing vector tys" );
2018
+ return nullptr ;
2019
+ }
2020
+ assert (Op1Ty.isVector () && " Expected a vector for vector concat" );
2021
+
2022
+ if (Op1Ty.getSizeInBits () >= 128 ) {
2023
+ LLVM_DEBUG (dbgs () << " Vector concat not supported for full size vectors" );
2024
+ return nullptr ;
2025
+ }
2026
+
2027
+ // At the moment we just support 64 bit vector concats.
2028
+ if (Op1Ty.getSizeInBits () != 64 ) {
2029
+ LLVM_DEBUG (dbgs () << " Vector concat supported for 64b vectors" );
2030
+ return nullptr ;
2031
+ }
2032
+
2033
+ const LLT ScalarTy = LLT::scalar (Op1Ty.getSizeInBits ());
2034
+ const LLT &DstTy = LLT::vector (2 , ScalarTy);
2035
+ const RegisterBank &FPRBank = *RBI.getRegBank (Op1, MRI, TRI);
2036
+ const TargetRegisterClass *DstRC =
2037
+ getMinClassForRegBank (FPRBank, Op1Ty.getSizeInBits () * 2 );
2038
+
2039
+ MachineInstr *WidenedOp1 = emitScalarToVector (DstTy, DstRC, Op1, MIRBuilder);
2040
+ MachineInstr *WidenedOp2 = emitScalarToVector (DstTy, DstRC, Op2, MIRBuilder);
2041
+ if (!WidenedOp1 || !WidenedOp2) {
2042
+ LLVM_DEBUG (dbgs () << " Could not emit a vector from scalar value" );
2043
+ return nullptr ;
2044
+ }
2045
+
2046
+ // Now do the insert of the upper element.
2047
+ unsigned InsertOpc, InsSubRegIdx;
2048
+ std::tie (InsertOpc, InsSubRegIdx) =
2049
+ getInsertVecEltOpInfo (FPRBank, ScalarTy.getSizeInBits ());
2050
+
2051
+ auto InsElt =
2052
+ MIRBuilder
2053
+ .buildInstr (InsertOpc, {DstRC}, {WidenedOp1->getOperand (0 ).getReg ()})
2054
+ .addImm (1 ) /* Lane index */
2055
+ .addUse (WidenedOp2->getOperand (0 ).getReg ())
2056
+ .addImm (0 );
2057
+
2058
+ constrainSelectedInstRegOperands (*InsElt, TII, TRI, RBI);
2059
+ return &*InsElt;
2060
+ }
2061
+
1968
2062
bool AArch64InstructionSelector::selectShuffleVector (
1969
2063
MachineInstr &I, MachineRegisterInfo &MRI) const {
1970
2064
const LLT DstTy = MRI.getType (I.getOperand (0 ).getReg ());
@@ -2002,21 +2096,37 @@ bool AArch64InstructionSelector::selectShuffleVector(
2002
2096
}
2003
2097
}
2004
2098
2005
- if (DstTy.getSizeInBits () != 128 ) {
2006
- assert (DstTy.getSizeInBits () == 64 && " Unexpected shuffle result ty" );
2007
- // This case can be done with TBL1.
2008
- return false ;
2009
- }
2099
+ MachineIRBuilder MIRBuilder (I);
2010
2100
2011
2101
// Use a constant pool to load the index vector for TBL.
2012
2102
Constant *CPVal = ConstantVector::get (CstIdxs);
2013
- MachineIRBuilder MIRBuilder (I);
2014
2103
MachineInstr *IndexLoad = emitLoadFromConstantPool (CPVal, MIRBuilder);
2015
2104
if (!IndexLoad) {
2016
2105
LLVM_DEBUG (dbgs () << " Could not load from a constant pool" );
2017
2106
return false ;
2018
2107
}
2019
2108
2109
+ if (DstTy.getSizeInBits () != 128 ) {
2110
+ assert (DstTy.getSizeInBits () == 64 && " Unexpected shuffle result ty" );
2111
+ // This case can be done with TBL1.
2112
+ MachineInstr *Concat = emitVectorConcat (Src1Reg, Src2Reg, MIRBuilder);
2113
+ if (!Concat) {
2114
+ LLVM_DEBUG (dbgs () << " Could not do vector concat for tbl1" );
2115
+ return false ;
2116
+ }
2117
+ auto TBL1 = MIRBuilder.buildInstr (
2118
+ AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
2119
+ {Concat->getOperand (0 ).getReg (), IndexLoad->getOperand (0 ).getReg ()});
2120
+ constrainSelectedInstRegOperands (*TBL1, TII, TRI, RBI);
2121
+
2122
+ auto Copy = BuildMI (*I.getParent (), I, I.getDebugLoc (),
2123
+ TII.get (TargetOpcode::COPY), I.getOperand (0 ).getReg ())
2124
+ .addUse (TBL1->getOperand (0 ).getReg (), 0 , AArch64::dsub);
2125
+ RBI.constrainGenericRegister (Copy.getReg (0 ), AArch64::FPR64RegClass, MRI);
2126
+ I.eraseFromParent ();
2127
+ return true ;
2128
+ }
2129
+
2020
2130
// For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
2021
2131
// Q registers for regalloc.
2022
2132
auto RegSeq = MIRBuilder
@@ -2048,26 +2158,8 @@ bool AArch64InstructionSelector::selectBuildVector(
2048
2158
const RegisterBank &RB = *RBI.getRegBank (I.getOperand (1 ).getReg (), MRI, TRI);
2049
2159
unsigned Opc;
2050
2160
unsigned SubregIdx;
2051
- if (RB.getID () == AArch64::GPRRegBankID) {
2052
- if (EltSize == 32 ) {
2053
- Opc = AArch64::INSvi32gpr;
2054
- SubregIdx = AArch64::ssub;
2055
- } else {
2056
- Opc = AArch64::INSvi64gpr;
2057
- SubregIdx = AArch64::dsub;
2058
- }
2059
- } else {
2060
- if (EltSize == 16 ) {
2061
- Opc = AArch64::INSvi16lane;
2062
- SubregIdx = AArch64::hsub;
2063
- } else if (EltSize == 32 ) {
2064
- Opc = AArch64::INSvi32lane;
2065
- SubregIdx = AArch64::ssub;
2066
- } else {
2067
- Opc = AArch64::INSvi64lane;
2068
- SubregIdx = AArch64::dsub;
2069
- }
2070
- }
2161
+
2162
+ std::tie (Opc, SubregIdx) = getInsertVecEltOpInfo (RB, EltSize);
2071
2163
2072
2164
MachineIRBuilder MIRBuilder (I);
2073
2165
0 commit comments