29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#ifdef EXPENSIVE_CHECKS
37#define DEBUG_TYPE "amdgpu-isel"
52 In = stripBitcast(In);
58 Out = In.getOperand(0);
69 if (ShiftAmt->getZExtValue() == 16) {
85 return In.getOperand(0);
90 if (Src.getValueType().getSizeInBits() == 32)
91 return stripBitcast(Src);
100 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
105#ifdef EXPENSIVE_CHECKS
110 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
131bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
195#ifdef EXPENSIVE_CHECKS
196 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
197 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
199 assert(L->isLCSSAForm(DT));
208#ifdef EXPENSIVE_CHECKS
217 MVT VT =
N->getValueType(0).getSimpleVT();
218 if (VT != MVT::v2i16 && VT != MVT::v2f16)
224 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(
Hi));
261 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(
Lo));
262 if (LdLo &&
Lo.hasOneUse()) {
301 bool MadeChange =
false;
307 switch (
N->getOpcode()) {
324bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
330 return TII->isInlineConstant(
C->getAPIntValue());
333 return TII->isInlineConstant(
C->getValueAPF());
343 unsigned OpNo)
const {
344 if (!
N->isMachineOpcode()) {
346 Register Reg = cast<RegisterSDNode>(
N->getOperand(1))->getReg();
347 if (Reg.isVirtual()) {
349 return MRI.getRegClass(Reg);
353 return TRI->getPhysRegBaseClass(Reg);
359 switch (
N->getMachineOpcode()) {
363 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
364 if (OpIdx >=
Desc.getNumOperands())
366 int RegClass =
Desc.operands()[OpIdx].RegClass;
372 case AMDGPU::REG_SEQUENCE: {
373 unsigned RCID =
N->getConstantOperandVal(0);
377 SDValue SubRegOp =
N->getOperand(OpNo + 1);
387 SmallVector <SDValue, 8> Ops;
389 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
400 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
403 return glueCopyToOp(
N,
M0,
M0.getValue(1));
406SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
407 unsigned AS = cast<MemSDNode>(
N)->getAddressSpace();
424 AMDGPU::S_MOV_B32,
DL, MVT::i32,
427 AMDGPU::S_MOV_B32,
DL, MVT::i32,
438 EVT VT =
N->getValueType(0);
444 if (NumVectorElts == 1) {
450 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
460 bool IsRegSeq =
true;
461 unsigned NOps =
N->getNumOperands();
462 for (
unsigned i = 0; i < NOps; i++) {
464 if (isa<RegisterSDNode>(
N->getOperand(i))) {
470 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
473 if (NOps != NumVectorElts) {
478 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
481 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
482 RegSeqArgs[1 + (2 * i) + 1] =
493 unsigned int Opc =
N->getOpcode();
494 if (
N->isMachineOpcode()) {
502 N = glueCopyToM0LDSInit(
N);
517 if (
N->getValueType(0) != MVT::i64)
520 SelectADD_SUB_I64(
N);
525 if (
N->getValueType(0) != MVT::i32)
532 SelectUADDO_USUBO(
N);
536 SelectFMUL_W_CHAIN(
N);
540 SelectFMA_W_CHAIN(
N);
546 EVT VT =
N->getValueType(0);
560 unsigned RegClassID =
568 if (
N->getValueType(0) == MVT::i128) {
572 }
else if (
N->getValueType(0) == MVT::i64) {
579 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
580 N->getOperand(1), SubReg1 };
582 N->getValueType(0), Ops));
588 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N))
593 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
598 Imm =
C->getZExtValue();
645 return SelectMUL_LOHI(
N);
656 if (
N->getValueType(0) != MVT::i32)
673 if (
N->getValueType(0) == MVT::i32) {
676 { N->getOperand(0), N->getOperand(1) });
684 SelectINTRINSIC_W_CHAIN(
N);
688 SelectINTRINSIC_WO_CHAIN(
N);
692 SelectINTRINSIC_VOID(
N);
696 SelectWAVE_ADDRESS(
N);
700 SelectSTACKRESTORE(
N);
708bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
711 return Term->getMetadata(
"amdgpu.uniform") ||
712 Term->getMetadata(
"structurizecfg.uniform");
715bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
716 unsigned ShAmtBits)
const {
719 const APInt &
RHS =
N->getConstantOperandAPInt(1);
720 if (
RHS.countr_one() >= ShAmtBits)
724 return (LHSKnownZeros | RHS).
countr_one() >= ShAmtBits;
750 N1 =
Lo.getOperand(1);
767 assert(LHS && RHS && isa<ConstantSDNode>(RHS));
775 return "AMDGPU DAG->DAG Pattern Instruction Selection";
785#ifdef EXPENSIVE_CHECKS
791 for (
auto &L : LI.getLoopsInPreorder())
792 assert(L->isLCSSAForm(DT) &&
"Loop is not in LCSSA form!");
811 if ((
C = dyn_cast<ConstantSDNode>(
Addr))) {
815 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(0)))) {
819 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(1)))) {
830SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
833 AMDGPU::S_MOV_B32,
DL, MVT::i32,
839void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
844 unsigned Opcode =
N->getOpcode();
854 DL, MVT::i32, LHS, Sub0);
856 DL, MVT::i32, LHS, Sub1);
859 DL, MVT::i32, RHS, Sub0);
861 DL, MVT::i32, RHS, Sub1);
865 static const unsigned OpcMap[2][2][2] = {
866 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
867 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
868 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
869 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
871 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
872 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
897 MVT::i64, RegSequenceArgs);
908void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
914 if (
N->isDivergent()) {
916 : AMDGPU::V_SUBB_U32_e64;
918 N, Opc,
N->getVTList(),
920 CurDAG->getTargetConstant(0, {}, MVT::i1) });
923 : AMDGPU::S_SUB_CO_PSEUDO;
924 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(), {
LHS,
RHS, CI});
928void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
933 bool IsVALU =
N->isDivergent();
937 if (UI.getUse().getResNo() == 1) {
946 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
949 N, Opc,
N->getVTList(),
950 {N->getOperand(0), N->getOperand(1),
951 CurDAG->getTargetConstant(0, {}, MVT::i1) });
953 unsigned Opc =
N->getOpcode() ==
ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
954 : AMDGPU::S_USUBO_PSEUDO;
956 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(),
957 {
N->getOperand(0),
N->getOperand(1)});
961void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
966 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
967 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
968 SelectVOP3Mods(
N->getOperand(3), Ops[5], Ops[4]);
969 Ops[8] =
N->getOperand(0);
970 Ops[9] =
N->getOperand(4);
975 cast<ConstantSDNode>(Ops[0])->isZero() &&
976 cast<ConstantSDNode>(Ops[2])->isZero() &&
977 cast<ConstantSDNode>(Ops[4])->isZero();
978 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
982void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
987 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
988 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
989 Ops[6] =
N->getOperand(0);
990 Ops[7] =
N->getOperand(3);
997void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
999 EVT VT =
N->getValueType(0);
1001 assert(VT == MVT::f32 || VT == MVT::f64);
1004 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1009 SelectVOP3BMods0(
N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1010 SelectVOP3BMods(
N->getOperand(1), Ops[3], Ops[2]);
1011 SelectVOP3BMods(
N->getOperand(2), Ops[5], Ops[4]);
1017void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1022 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1023 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1025 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1028 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1035void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1040 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1041 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1043 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1047 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
Zero, Clamp};
1052 MVT::i32,
SDValue(Mad, 0), Sub0);
1058 MVT::i32,
SDValue(Mad, 0), Sub1);
1093 int64_t ByteOffset =
C->getSExtValue();
1094 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1101 Zero,
Addr.getOperand(1));
1103 if (isDSOffsetLegal(Sub, ByteOffset)) {
1109 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1111 SubOp = AMDGPU::V_SUB_U32_e64;
1133 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1136 DL, MVT::i32, Zero);
1149bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1151 unsigned Size)
const {
1152 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1154 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
1169 Addr->getFlags().hasNoUnsignedWrap()) ||
1176bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1185 auto LHS =
Addr.getOperand(0);
1186 auto RHS =
Addr.getOperand(1);
1193 if (
Addr.getOpcode() ==
ISD::ADD && (ImmOp = dyn_cast<ConstantSDNode>(RHS))) {
1203bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1212 auto LHS =
Addr.getOperand(0);
1213 auto RHS =
Addr.getOperand(1);
1219bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1226 auto *RHSImm = cast<ConstantSDNode>(
Addr.getOperand(1));
1233 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1236 auto LHS =
Base.getOperand(0);
1237 auto RHS =
Base.getOperand(1);
1245 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 4);
1251 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 8);
1256 unsigned Size)
const {
1264 unsigned OffsetValue1 = OffsetValue0 +
Size;
1267 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1276 dyn_cast<ConstantSDNode>(
Addr.getOperand(0))) {
1277 unsigned OffsetValue0 =
C->getZExtValue();
1278 unsigned OffsetValue1 = OffsetValue0 +
Size;
1280 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1290 if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1,
Size)) {
1294 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1296 SubOp = AMDGPU::V_SUB_U32_e64;
1314 unsigned OffsetValue0 = CAddr->getZExtValue();
1315 unsigned OffsetValue1 = OffsetValue0 +
Size;
1317 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1357 C1 = cast<ConstantSDNode>(
Addr.getOperand(1));
1359 N0 =
Addr.getOperand(0);
1418 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1434 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1438 if (
C->getSExtValue()) {
1451std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1454 auto *FI = dyn_cast<FrameIndexSDNode>(
N);
1465bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1477 int64_t
Imm = CAddr->getSExtValue();
1478 const int64_t NullPtr =
1481 if (Imm != NullPtr) {
1486 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1487 VAddr =
SDValue(MovHighBits, 0);
1517 if (
TII->isLegalMUBUFImmOffset(C1) &&
1520 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1527 std::tie(VAddr, SOffset) = foldFrameIndex(
Addr);
1535 auto Reg = cast<RegisterSDNode>(Val.
getOperand(1))->getReg();
1536 if (!Reg.isPhysical())
1538 const auto *RC =
TRI.getPhysRegBaseClass(Reg);
1539 return RC &&
TRI.isSGPRClass(RC);
1542bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1565 CAddr = dyn_cast<ConstantSDNode>(
Addr.getOperand(1));
1571 SOffset =
Addr.getOperand(0);
1572 }
else if ((CAddr = dyn_cast<ConstantSDNode>(
Addr)) &&
1592 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1595 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1596 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1597 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1599 maskTrailingOnes<uint64_t>(32);
1611bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1618 SOffset = ByteOffsetNode;
1628 assert(isa<BuildVectorSDNode>(
N));
1639 int64_t OffsetVal = 0;
1643 bool CanHaveFlatSegmentOffsetBug =
1650 if (isBaseWithConstantOffset64(
Addr, N0, N1) &&
1652 isFlatScratchBaseLegal(
Addr))) {
1653 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1656 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1658 OffsetVal = COffsetVal;
1673 std::tie(OffsetVal, RemainderOffset) =
1674 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1677 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1680 if (
Addr.getValueType().getSizeInBits() == 32) {
1684 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1686 AddOp = AMDGPU::V_ADD_U32_e64;
1697 DL, MVT::i32, N0, Sub0);
1699 DL, MVT::i32, N0, Sub1);
1702 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1708 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1711 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1719 MVT::i64, RegSequenceArgs),
1746 return SelectFlatOffsetImpl(
N,
Addr, VAddr,
Offset,
1760bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
1765 int64_t ImmOffset = 0;
1771 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1772 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1778 ImmOffset = COffsetVal;
1779 }
else if (!
LHS->isDivergent()) {
1780 if (COffsetVal > 0) {
1785 int64_t SplitImmOffset, RemainderOffset;
1786 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1789 if (isUInt<32>(RemainderOffset)) {
1791 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1805 unsigned NumLiterals =
1818 if (!
LHS->isDivergent()) {
1826 if (!SAddr && !
RHS->isDivergent()) {
1841 isa<ConstantSDNode>(
Addr))
1856 if (
auto *FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1859 isa<FrameIndexSDNode>(SAddr.
getOperand(0))) {
1862 auto *FI = cast<FrameIndexSDNode>(SAddr.
getOperand(0));
1864 FI->getValueType(0));
1877 if (
Addr->isDivergent())
1882 int64_t COffsetVal = 0;
1885 COffsetVal = cast<ConstantSDNode>(
Addr.getOperand(1))->getSExtValue();
1886 SAddr =
Addr.getOperand(0);
1897 int64_t SplitImmOffset, RemainderOffset;
1898 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1901 COffsetVal = SplitImmOffset;
1905 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
1906 :
CurDAG->getSignedTargetConstant(RemainderOffset,
DL,
MVT::i32);
1918bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
1932 return (VMax & 3) + (
SMax & 3) >= 4;
1938 int64_t ImmOffset = 0;
1942 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1943 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1948 ImmOffset = COffsetVal;
1949 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
1953 int64_t SplitImmOffset, RemainderOffset;
1954 std::tie(SplitImmOffset, RemainderOffset)
1957 if (isUInt<32>(RemainderOffset)) {
1959 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1963 if (!isFlatScratchBaseLegal(
Addr))
1965 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
1979 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
1982 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
1989 if (OrigAddr !=
Addr) {
1990 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
1993 if (!isFlatScratchBaseLegalSV(OrigAddr))
1997 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
2007bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(
SDValue *SOffset,
2010 int64_t ImmOffset)
const {
2011 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
2024bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDValue ByteOffsetNode,
2026 bool Imm32Only,
bool IsBuffer,
2028 int64_t ImmOffset)
const {
2030 "Cannot match both soffset and offset at the same time!");
2039 *SOffset = ByteOffsetNode;
2040 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2046 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2053 SDLoc SL(ByteOffsetNode);
2057 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2059 *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2060 if (EncodedOffset &&
Offset && !Imm32Only) {
2070 if (EncodedOffset &&
Offset && Imm32Only) {
2075 if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
2089 if (
Addr.getValueType() != MVT::i32)
2097 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2118 bool Imm32Only,
bool IsBuffer,
2120 int64_t ImmOffset)
const {
2122 assert(!Imm32Only && !IsBuffer);
2125 if (!SelectSMRDBaseOffset(
Addr,
B,
nullptr,
Offset,
false,
false,
true))
2130 ImmOff =
C->getSExtValue();
2132 return SelectSMRDBaseOffset(
B, SBase, SOffset,
nullptr,
false,
false,
true,
2139 !
Addr->getFlags().hasNoUnsignedWrap())
2145 N0 =
Addr.getOperand(0);
2146 N1 =
Addr.getOperand(1);
2148 assert(N0 && N1 && isa<ConstantSDNode>(N1));
2153 if (SelectSMRDOffset(N1, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2158 if (SelectSMRDOffset(N0, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2168 bool Imm32Only)
const {
2169 if (SelectSMRDBaseOffset(
Addr, SBase, SOffset,
Offset, Imm32Only)) {
2170 SBase = Expand32BitAddress(SBase);
2174 if (
Addr.getValueType() == MVT::i32 &&
Offset && !SOffset) {
2185 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset);
2191 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset,
2197 return SelectSMRD(
Addr, SBase, &SOffset,
nullptr);
2203 return SelectSMRD(
Addr, SBase, &SOffset, &
Offset);
2207 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2211bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2214 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2218bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2222 return N.getValueType() == MVT::i32 &&
2223 SelectSMRDBaseOffset(
N, SOffset,
nullptr,
2228bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2250 if (isa<ConstantSDNode>(Index))
2258SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2262 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2268 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2278void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2283 const SDValue &Shl =
N->getOperand(0);
2291 if (0 < BVal && BVal <= CVal && CVal < 32) {
2301void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2302 switch (
N->getOpcode()) {
2304 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2307 const SDValue &Srl =
N->getOperand(0);
2311 if (Shift && Mask) {
2325 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2332 if (Shift && Mask) {
2343 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2344 SelectS_BFEFromShifts(
N);
2349 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2350 SelectS_BFEFromShifts(
N);
2361 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2365 unsigned Width = cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2375bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2377 if (!
N->hasOneUse())
2387 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2391 if (VT == MVT::i64) {
2414 auto VCMP_CC = cast<CondCodeSDNode>(VCMP.getOperand(2))->get();
2418 auto Cond = VCMP.getOperand(0);
2430void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2433 if (
Cond.isUndef()) {
2435 N->getOperand(2),
N->getOperand(0));
2441 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2442 bool AndExec = !UseSCCBr;
2443 bool Negate =
false;
2448 auto CC = cast<CondCodeSDNode>(
Cond->getOperand(2))->get();
2461 bool NegatedBallot =
false;
2464 UseSCCBr = !BallotCond->isDivergent();
2465 Negate = Negate ^ NegatedBallot;
2480 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2481 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2482 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2501 Subtarget->
isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, SL,
2516void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2518 !
N->isDivergent()) {
2520 if (Src.getValueType() == MVT::f16) {
2532void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2535 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2536 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2550 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2551 N = glueCopyToM0(
N, PtrBase);
2557 N = glueCopyToM0(
N,
Ptr);
2565 N->getOperand(
N->getNumOperands() - 1)
2574void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N) {
2575 unsigned Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2576 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
2577 N->getOperand(5),
N->getOperand(0)};
2587 case Intrinsic::amdgcn_ds_gws_init:
2588 return AMDGPU::DS_GWS_INIT;
2589 case Intrinsic::amdgcn_ds_gws_barrier:
2590 return AMDGPU::DS_GWS_BARRIER;
2591 case Intrinsic::amdgcn_ds_gws_sema_v:
2592 return AMDGPU::DS_GWS_SEMA_V;
2593 case Intrinsic::amdgcn_ds_gws_sema_br:
2594 return AMDGPU::DS_GWS_SEMA_BR;
2595 case Intrinsic::amdgcn_ds_gws_sema_p:
2596 return AMDGPU::DS_GWS_SEMA_P;
2597 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2598 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2604void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
2605 if (!Subtarget->
hasGWS() ||
2606 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2614 const bool HasVSrc =
N->getNumOperands() == 4;
2615 assert(HasVSrc ||
N->getNumOperands() == 3);
2618 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
2629 if (
ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2635 ImmOffset = ConstOffset->getZExtValue();
2653 glueCopyToM0(
N,
SDValue(M0Base, 0));
2670void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
2728void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
2729 unsigned IntrID =
N->getConstantOperandVal(1);
2731 case Intrinsic::amdgcn_ds_append:
2732 case Intrinsic::amdgcn_ds_consume: {
2733 if (
N->getValueType(0) != MVT::i32)
2735 SelectDSAppendConsume(
N, IntrID);
2738 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2739 SelectDSBvhStackIntrinsic(
N);
2741 case Intrinsic::amdgcn_init_whole_wave:
2744 ->setInitWholeWave();
2751void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
2752 unsigned IntrID =
N->getConstantOperandVal(0);
2753 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
2754 SDNode *ConvGlueNode =
N->getGluedNode();
2761 MVT::Glue,
SDValue(ConvGlueNode, 0));
2763 ConvGlueNode =
nullptr;
2766 case Intrinsic::amdgcn_wqm:
2767 Opcode = AMDGPU::WQM;
2769 case Intrinsic::amdgcn_softwqm:
2770 Opcode = AMDGPU::SOFT_WQM;
2772 case Intrinsic::amdgcn_wwm:
2773 case Intrinsic::amdgcn_strict_wwm:
2774 Opcode = AMDGPU::STRICT_WWM;
2776 case Intrinsic::amdgcn_strict_wqm:
2777 Opcode = AMDGPU::STRICT_WQM;
2779 case Intrinsic::amdgcn_interp_p1_f16:
2780 SelectInterpP1F16(
N);
2782 case Intrinsic::amdgcn_permlane16_swap:
2783 case Intrinsic::amdgcn_permlane32_swap: {
2784 if ((IntrID == Intrinsic::amdgcn_permlane16_swap &&
2786 (IntrID == Intrinsic::amdgcn_permlane32_swap &&
2792 Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
2793 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
2794 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
2798 NewOps.push_back(
SDValue(ConvGlueNode, 0));
2800 bool FI =
N->getConstantOperandVal(3);
2812 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
2819 NewOps.push_back(
SDValue(ConvGlueNode, 0));
2824void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
2825 unsigned IntrID =
N->getConstantOperandVal(1);
2827 case Intrinsic::amdgcn_ds_gws_init:
2828 case Intrinsic::amdgcn_ds_gws_barrier:
2829 case Intrinsic::amdgcn_ds_gws_sema_v:
2830 case Intrinsic::amdgcn_ds_gws_sema_br:
2831 case Intrinsic::amdgcn_ds_gws_sema_p:
2832 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2833 SelectDS_GWS(
N, IntrID);
2842void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
2846 {N->getOperand(0), Log2WaveSize});
2849void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
2866 if (
N->isDivergent()) {
2873 {SrcVal, Log2WaveSize}),
2881bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
2883 bool IsCanonicalizing,
2884 bool AllowAbs)
const {
2890 Src = Src.getOperand(0);
2891 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
2894 auto *
LHS = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2895 if (LHS &&
LHS->isZero()) {
2897 Src = Src.getOperand(1);
2901 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
2903 Src = Src.getOperand(0);
2912 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
2921bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
2924 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
2933bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
2936 if (SelectVOP3ModsImpl(In, Src, Mods,
2946bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
2954bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
2958 if (SelectVOP3ModsImpl(In, Src, Mods,
2970bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
2972 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
2975bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
2977 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
2980bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
2987 return SelectVOP3Mods(In, Src, SrcMods);
2990bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
2997 return SelectVOP3BMods(In, Src, SrcMods);
3000bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
3011bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
3012 SDValue &SrcMods,
bool IsDOT)
const {
3019 Src = Src.getOperand(0);
3024 unsigned VecMods = Mods;
3026 SDValue Lo = stripBitcast(Src.getOperand(0));
3027 SDValue Hi = stripBitcast(Src.getOperand(1));
3030 Lo = stripBitcast(
Lo.getOperand(0));
3035 Hi = stripBitcast(
Hi.getOperand(0));
3045 unsigned VecSize = Src.getValueSizeInBits();
3046 Lo = stripExtractLoElt(
Lo);
3047 Hi = stripExtractLoElt(
Hi);
3049 if (
Lo.getValueSizeInBits() > VecSize) {
3051 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
3055 if (
Hi.getValueSizeInBits() > VecSize) {
3057 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
3061 assert(
Lo.getValueSizeInBits() <= VecSize &&
3062 Hi.getValueSizeInBits() <= VecSize);
3064 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
3068 if (VecSize == 32 || VecSize ==
Lo.getValueSizeInBits()) {
3071 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
3076 Lo.getValueType()), 0);
3077 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3078 : AMDGPU::SReg_64RegClassID;
3085 Src.getValueType(), Ops), 0);
3091 if (VecSize == 64 &&
Lo ==
Hi && isa<ConstantFPSDNode>(
Lo)) {
3092 uint64_t Lit = cast<ConstantFPSDNode>(
Lo)->getValueAPF()
3093 .bitcastToAPInt().getZExtValue();
3111bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3113 return SelectVOP3PMods(In, Src, SrcMods,
true);
3116bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(
SDValue In,
SDValue &Src)
const {
3120 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3123 unsigned SrcSign =
C->getZExtValue();
3131bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3134 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3137 unsigned SrcVal =
C->getZExtValue();
3148 unsigned DstRegClass;
3150 switch (Elts.
size()) {
3152 DstRegClass = AMDGPU::VReg_256RegClassID;
3156 DstRegClass = AMDGPU::VReg_128RegClassID;
3160 DstRegClass = AMDGPU::VReg_64RegClassID;
3169 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3181 assert(
"unhandled Reg sequence size" &&
3182 (Elts.
size() == 8 || Elts.
size() == 16));
3186 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3187 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3195 {Elts[i + 1], Elts[i], PackLoLo});
3205 const SDLoc &
DL,
unsigned ElementSize) {
3206 if (ElementSize == 16)
3208 if (ElementSize == 32)
3216 unsigned ElementSize) {
3221 for (
auto El : Elts) {
3224 NegAbsElts.
push_back(El->getOperand(0));
3226 if (Elts.size() != NegAbsElts.
size()) {
3246 std::function<
bool(
SDValue)> ModifierCheck) {
3249 dyn_cast<BuildVectorSDNode>(stripBitcast(BV->
getOperand(i)))) {
3250 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3251 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3252 if (!ModifierCheck(ElF16))
3259bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
3265 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3284 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3306bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
3313 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3317 if (EltsF16.
empty())
3332 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3338 if (EltsV2F16.
empty())
3355bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
3361 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3365 unsigned ModOpcode =
3384bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
3385 if (
auto *BV = dyn_cast<BuildVectorSDNode>(In)) {
3388 if (isInlineImmediate(
Splat.getNode())) {
3390 unsigned Imm =
C->getAPIntValue().getSExtValue();
3395 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
3404 SDValue SplatSrc32 = stripBitcast(In);
3405 if (
auto *SplatSrc32BV = dyn_cast<BuildVectorSDNode>(SplatSrc32))
3406 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
3407 SDValue SplatSrc16 = stripBitcast(Splat32);
3408 if (
auto *SplatSrc16BV = dyn_cast<BuildVectorSDNode>(SplatSrc16))
3411 std::optional<APInt> RawValue;
3413 RawValue =
C->getValueAPF().bitcastToAPInt();
3415 RawValue =
C->getAPIntValue();
3417 if (RawValue.has_value()) {
3418 EVT VT =
In.getValueType().getScalarType();
3424 if (
TII->isInlineConstant(FloatVal)) {
3430 if (
TII->isInlineConstant(RawValue.value())) {
3444bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
3463bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
3482bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
3490bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
3493 return SelectVOP3Mods(In, Src, SrcMods);
3498bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
3499 unsigned &Mods)
const {
3501 SelectVOP3ModsImpl(In, Src, Mods);
3504 Src = Src.getOperand(0);
3505 assert(Src.getValueType() == MVT::f16);
3506 Src = stripBitcast(Src);
3512 SelectVOP3ModsImpl(Src, Src, ModsTmp);
3539bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
3542 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods))
3548bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
3551 SelectVOP3PMadMixModsImpl(In, Src, Mods);
3560 unsigned NumOpcodes = 0;
3573 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
3575 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op)) {
3576 if (
C->isAllOnes()) {
3586 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3600 if (Src.size() == 3) {
3605 if (
auto *
C = dyn_cast<ConstantSDNode>(
Op.getOperand(1))) {
3606 if (
C->isAllOnes()) {
3608 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3609 if (Src[
I] ==
LHS) {
3621 Bits = SrcBits[Src.size()];
3626 switch (In.getOpcode()) {
3634 if (!getOperandBits(
LHS, LHSBits) ||
3635 !getOperandBits(
RHS, RHSBits)) {
3637 return std::make_pair(0, 0);
3643 NumOpcodes +=
Op.first;
3644 LHSBits =
Op.second;
3649 NumOpcodes +=
Op.first;
3650 RHSBits =
Op.second;
3655 return std::make_pair(0, 0);
3659 switch (In.getOpcode()) {
3661 TTbl = LHSBits & RHSBits;
3664 TTbl = LHSBits | RHSBits;
3667 TTbl = LHSBits ^ RHSBits;
3673 return std::make_pair(NumOpcodes + 1, TTbl);
3680 unsigned NumOpcodes;
3682 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(In, Src);
3686 if (NumOpcodes < 2 || Src.empty())
3692 if (NumOpcodes < 4 && !In->isDivergent())
3695 if (NumOpcodes == 2 &&
In.getValueType() == MVT::i32) {
3700 (
In.getOperand(0).getOpcode() ==
In.getOpcode() ||
3701 In.getOperand(1).getOpcode() ==
In.getOpcode()))
3715 while (Src.size() < 3)
3716 Src.push_back(Src[0]);
3738 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
3748bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
3757 bool AllUsesAcceptSReg =
true;
3759 Limit < 10 &&
U != E; ++
U, ++Limit) {
3761 getOperandRegClass(
U->getUser(),
U->getOperandNo());
3769 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass) {
3770 AllUsesAcceptSReg =
false;
3772 if (
User->isMachineOpcode()) {
3773 unsigned Opc =
User->getMachineOpcode();
3775 if (
Desc.isCommutable()) {
3776 unsigned OpIdx =
Desc.getNumDefs() +
U->getOperandNo();
3779 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
3781 getOperandRegClass(
U->getUser(), CommutedOpNo);
3782 if (CommutedRC == &AMDGPU::VS_32RegClass ||
3783 CommutedRC == &AMDGPU::VS_64RegClass)
3784 AllUsesAcceptSReg =
true;
3792 if (!AllUsesAcceptSReg)
3796 return !AllUsesAcceptSReg && (Limit < 10);
3799bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
3800 const auto *Ld = cast<LoadSDNode>(
N);
3816 ->isMemOpHasNoClobberedMemOperand(
N)));
3822 bool IsModified =
false;
3829 SDNode *Node = &*Position++;
3835 if (ResNode != Node) {
3842 }
while (IsModified);
unsigned const MachineRegisterInfo * MRI
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static MachineSDNode * buildRegSequence32(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static SDValue matchZExtFromI32(SDValue Op)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static MemSDNode * findMemSDNode(SDNode *N)
static MachineSDNode * buildRegSequence16(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
unsigned const TargetRegisterInfo * TRI
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
AMDGPUDAGToDAGISel()=delete
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool matchLoadD16FromBuildVector(SDNode *N) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMDGPUISelDAGToDAGPass(TargetMachine &TM)
static bool isUniformMMO(const MachineMemOperand *MMO)
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
static SDValue stripBitcast(SDValue Val)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
A "pseudo-class" with methods for operating on BUILD_VECTORs.
SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
bool hasPermlane32Swap() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatInstOffsets() const
const SIInstrInfo * getInstrInfo() const override
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasDOTOpSelHazard() const
bool d16PreservesUnusedBits() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool getScalarizeGlobalBehavior() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasPermlane16Swap() const
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
Describe properties that are true of each instruction in the target description file.
const Triple & getTargetTriple() const
static MVT getIntegerVT(unsigned BitWidth)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A set of analyses that are preserved following a run of a transformation pass.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterClass * getRegClass(unsigned RCID) const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
virtual bool runOnMachineFunction(MachineFunction &mf)
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
SDValue getRegister(Register Reg, EVT VT)
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
allnodes_const_iterator allnodes_begin() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
const TargetMachine & getTarget() const
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
ArchType getArch() const
Get the parsed architecture type of this triple.
LLVM Value Representation.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ CLAMP
CLAMP value between 0.0 and 1.0.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ Undef
Value of the register doesn't matter.
constexpr const char32_t SBase
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
bool isBoolSGPR(SDValue V)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CodeGenOptLevel
Code generation optimization level.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
static SDNode * packConstantV2I16(const SDNode *N, SelectionDAG &DAG)
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Implement std::hash so that hash_code can be used in STL containers.
static const fltSemantics & IEEEhalf() LLVM_READNONE
static const fltSemantics & BFloat() LLVM_READNONE
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
static unsigned getSubRegFromChannel(unsigned Channel)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.