22#define DEBUG_TYPE "pipeliner"
27 cl::desc(
"Swap target blocks of a conditional branch for MVE expander"));
41 unsigned &InitVal,
unsigned &LoopVal) {
42 assert(Phi.isPHI() &&
"Expecting a Phi.");
46 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
47 if (Phi.getOperand(i + 1).getMBB() !=
Loop)
48 InitVal = Phi.getOperand(i).getReg();
50 LoopVal = Phi.getOperand(i).getReg();
52 assert(InitVal != 0 && LoopVal != 0 &&
"Unexpected Phi structure.");
57 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
58 if (Phi.getOperand(i + 1).getMBB() != LoopBB)
59 return Phi.getOperand(i).getReg();
65 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
66 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
67 return Phi.getOperand(i).getReg();
84 bool PhiIsSwapped =
false;
89 if (UseStage != -1 && UseStage >= DefStage)
90 Diff = UseStage - DefStage;
92 if (isLoopCarried(*
MI))
97 MaxDiff = std::max(Diff, MaxDiff);
99 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
103 generatePipelinedLoop();
106void ModuloScheduleExpander::generatePipelinedLoop() {
119 ValueMapTy *VRMap =
new ValueMapTy[(MaxStageCount + 1) * 2];
124 ValueMapTy *VRMapPhi =
new ValueMapTy[(MaxStageCount + 1) * 2];
131 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
140 unsigned StageNum = Schedule.
getStage(CI);
141 MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
142 updateInstruction(NewMI,
false, MaxStageCount, StageNum, VRMap);
144 InstrMap[NewMI] = CI;
151 updateInstruction(NewMI,
false, MaxStageCount, 0, VRMap);
153 InstrMap[NewMI] = &
MI;
156 NewKernel = KernelBB;
160 generateExistingPhis(KernelBB, PrologBBs.
back(), KernelBB, KernelBB, VRMap,
161 InstrMap, MaxStageCount, MaxStageCount,
false);
162 generatePhis(KernelBB, PrologBBs.
back(), KernelBB, KernelBB, VRMap, VRMapPhi,
163 InstrMap, MaxStageCount, MaxStageCount,
false);
169 generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
174 splitLifetimes(KernelBB, EpilogBBs);
177 removeDeadInstructions(KernelBB, EpilogBBs);
180 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
191 BB->eraseFromParent();
195void ModuloScheduleExpander::generateProlog(
unsigned LastStage,
198 MBBVectorTy &PrologBBs) {
205 for (
unsigned i = 0; i < LastStage; ++i) {
218 for (
int StageNum = i; StageNum >= 0; --StageNum) {
222 if (Schedule.
getStage(&*BBI) == StageNum) {
226 cloneAndChangeInstr(&*BBI, i, (
unsigned)StageNum);
227 updateInstruction(NewMI,
false, i, (
unsigned)StageNum, VRMap);
229 InstrMap[NewMI] = &*BBI;
233 rewritePhiValues(NewBB, i, VRMap, InstrMap);
235 dbgs() <<
"prolog:\n";
254void ModuloScheduleExpander::generateEpilog(
256 ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
257 MBBVectorTy &PrologBBs) {
263 assert(!checkBranch &&
"generateEpilog must be able to analyze the branch");
268 if (*LoopExitI == KernelBB)
270 assert(LoopExitI != KernelBB->
succ_end() &&
"Expecting a successor");
280 int EpilogStage = LastStage + 1;
281 for (
unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
290 if (EpilogStart == LoopExitBB)
295 for (
unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
296 for (
auto &BBI : *BB) {
300 if ((
unsigned)Schedule.
getStage(In) == StageNum) {
304 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
306 InstrMap[NewMI] =
In;
310 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
311 InstrMap, LastStage, EpilogStage, i == 1);
312 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
313 InstrMap, LastStage, EpilogStage, i == 1);
317 dbgs() <<
"epilog:\n";
328 assert((OrigBB ==
TBB || OrigBB == FBB) &&
329 "Unable to determine looping branch direction");
335 if (EpilogBBs.size() > 0) {
350 if (O.getParent()->getParent() !=
MBB)
361 if (MO.getParent()->getParent() != BB)
369void ModuloScheduleExpander::generateExistingPhis(
372 unsigned LastStageNum,
unsigned CurStageNum,
bool IsLast) {
376 unsigned PrologStage = 0;
377 unsigned PrevStage = 0;
378 bool InKernel = (LastStageNum == CurStageNum);
380 PrologStage = LastStageNum - 1;
381 PrevStage = CurStageNum;
383 PrologStage = LastStageNum - (CurStageNum - LastStageNum);
384 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
388 BBE = BB->getFirstNonPHI();
392 unsigned InitVal = 0;
393 unsigned LoopVal = 0;
399 unsigned PhiOp2 = LoopVal;
400 if (VRMap[LastStageNum].
count(LoopVal))
401 PhiOp2 = VRMap[LastStageNum][LoopVal];
403 int StageScheduled = Schedule.
getStage(&*BBI);
405 unsigned NumStages = getStagesForReg(Def, CurStageNum);
406 if (NumStages == 0) {
409 unsigned NewReg = VRMap[PrevStage][LoopVal];
410 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
412 if (VRMap[CurStageNum].
count(LoopVal))
413 VRMap[CurStageNum][
Def] = VRMap[CurStageNum][LoopVal];
419 unsigned MaxPhis = PrologStage + 2;
420 if (!InKernel && (
int)PrologStage <= LoopValStage)
421 MaxPhis = std::max((
int)MaxPhis - (
int)LoopValStage, 1);
422 unsigned NumPhis = std::min(NumStages, MaxPhis);
425 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
432 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
437 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
438 StageDiff = StageScheduled - LoopValStage;
439 for (
unsigned np = 0; np < NumPhis; ++np) {
443 if (np > PrologStage || StageScheduled >= (
int)LastStageNum)
446 else if (PrologStage >= AccessStage + StageDiff + np &&
447 VRMap[PrologStage - StageDiff - np].
count(LoopVal) != 0)
448 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
451 else if (PrologStage >= AccessStage + StageDiff + np) {
457 while (InstOp1 && InstOp1->
isPHI() && InstOp1->
getParent() == BB) {
458 int PhiStage = Schedule.
getStage(InstOp1);
459 if ((
int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
464 int PhiOpStage = Schedule.
getStage(InstOp1);
465 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
466 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
467 VRMap[PrologStage - StageAdj - Indirects - np].
count(PhiOp1)) {
468 PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
482 bool LoopDefIsPhi = PhiInst && PhiInst->
isPHI();
487 int StageDiffAdj = 0;
488 if (LoopValStage != -1 && StageScheduled > LoopValStage)
489 StageDiffAdj = StageScheduled - LoopValStage;
492 if (np == 0 && PrevStage == LastStageNum &&
493 (StageScheduled != 0 || LoopValStage != 0) &&
494 VRMap[PrevStage - StageDiffAdj].count(LoopVal))
495 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
498 else if (np > 0 && PrevStage == LastStageNum &&
499 VRMap[PrevStage - np + 1].
count(Def))
500 PhiOp2 = VRMap[PrevStage - np + 1][Def];
502 else if (
static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
503 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
504 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
507 else if (VRMap[PrevStage - np].
count(Def) &&
508 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
509 (LoopValStage == StageScheduled)))
510 PhiOp2 = VRMap[PrevStage - np][
Def];
518 if (
static_cast<int>(PrologStage - np) >= StageScheduled) {
519 int LVNumStages = getStagesForPhi(LoopVal);
520 int StageDiff = (StageScheduled - LoopValStage);
521 LVNumStages -= StageDiff;
523 if (LVNumStages > (
int)np && VRMap[CurStageNum].count(LoopVal)) {
525 unsigned ReuseStage = CurStageNum;
526 if (isLoopCarried(*PhiInst))
527 ReuseStage -= LVNumStages;
530 if (VRMap[ReuseStage - np].
count(LoopVal)) {
531 NewReg = VRMap[ReuseStage - np][LoopVal];
533 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
536 VRMap[CurStageNum - np][
Def] = NewReg;
538 if (VRMap[LastStageNum - np - 1].
count(LoopVal))
539 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
541 if (IsLast && np == NumPhis - 1)
547 if (InKernel && StageDiff > 0 &&
548 VRMap[CurStageNum - StageDiff - np].
count(LoopVal))
549 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
557 TII->
get(TargetOpcode::PHI), NewReg);
561 InstrMap[NewPhi] = &*BBI;
566 unsigned PrevReg = 0;
567 if (InKernel && VRMap[PrevStage - np].
count(LoopVal))
568 PrevReg = VRMap[PrevStage - np][LoopVal];
569 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
572 if (VRMap[CurStageNum - np].
count(Def)) {
573 unsigned R = VRMap[CurStageNum - np][
Def];
574 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
581 if (IsLast && np == NumPhis - 1)
589 VRMap[CurStageNum - np][
Def] = NewReg;
592 while (NumPhis++ < NumStages) {
593 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
599 if (NumStages == 0 && IsLast && VRMap[CurStageNum].
count(LoopVal))
607void ModuloScheduleExpander::generatePhis(
610 InstrMapTy &InstrMap,
unsigned LastStageNum,
unsigned CurStageNum,
614 unsigned PrologStage = 0;
615 unsigned PrevStage = 0;
616 unsigned StageDiff = CurStageNum - LastStageNum;
617 bool InKernel = (StageDiff == 0);
619 PrologStage = LastStageNum - 1;
620 PrevStage = CurStageNum;
622 PrologStage = LastStageNum - StageDiff;
623 PrevStage = LastStageNum + StageDiff - 1;
627 BBE = BB->instr_end();
629 for (
unsigned i = 0, e = BBI->getNumOperands(); i !=
e; ++i) {
634 int StageScheduled = Schedule.
getStage(&*BBI);
635 assert(StageScheduled != -1 &&
"Expecting scheduled instruction.");
637 unsigned NumPhis = getStagesForReg(Def, CurStageNum);
641 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
644 if (!InKernel && (
unsigned)StageScheduled > PrologStage)
649 PhiOp2 = VRMap[PrevStage][
Def];
651 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
656 if (NumPhis > PrologStage + 1 - StageScheduled)
657 NumPhis = PrologStage + 1 - StageScheduled;
658 for (
unsigned np = 0; np < NumPhis; ++np) {
681 unsigned PhiOp1 = VRMap[PrologStage][
Def];
682 if (np <= PrologStage)
683 PhiOp1 = VRMap[PrologStage - np][
Def];
685 if (PrevStage == LastStageNum && np == 0)
686 PhiOp2 = VRMap[LastStageNum][
Def];
688 PhiOp2 = VRMapPhi[PrevStage - np][
Def];
696 TII->
get(TargetOpcode::PHI), NewReg);
700 InstrMap[NewPhi] = &*BBI;
705 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
707 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
711 VRMapPhi[PrevStage - np - 1][
Def] = NewReg;
713 VRMapPhi[CurStageNum - np][
Def] = NewReg;
714 if (np == NumPhis - 1)
715 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
718 if (IsLast && np == NumPhis - 1)
730 MBBVectorTy &EpilogBBs) {
738 if (
MI->isInlineAsm()) {
742 bool SawStore =
false;
745 if (!
MI->isSafeToMove(SawStore) && !
MI->isPHI()) {
759 unsigned realUses = 0;
763 if (
U.getParent()->getParent() != BB) {
775 MI++->eraseFromParent();
786 MI.eraseFromParent();
802 MBBVectorTy &EpilogBBs) {
804 for (
auto &
PHI : KernelBB->
phis()) {
811 if (
I->isPHI() &&
I->getParent() == KernelBB) {
817 if (!
MI ||
MI->getParent() != KernelBB ||
MI->isPHI())
821 unsigned SplitReg = 0;
824 if (BBJ.readsRegister(Def,
nullptr)) {
829 TII->
get(TargetOpcode::COPY), SplitReg)
832 BBJ.substituteRegister(Def, SplitReg, 0, *
TRI);
837 for (
auto &
Epilog : EpilogBBs)
839 if (
I.readsRegister(Def,
nullptr))
840 I.substituteRegister(Def, SplitReg, 0, *
TRI);
852 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; i += 2)
853 if (
MI.getOperand(i + 1).getMBB() ==
Incoming) {
854 MI.removeOperand(i + 1);
865 MBBVectorTy &PrologBBs,
867 MBBVectorTy &EpilogBBs,
869 assert(PrologBBs.size() == EpilogBBs.size() &&
"Prolog/Epilog mismatch");
876 unsigned MaxIter = PrologBBs.
size() - 1;
877 for (
unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --
j) {
884 std::optional<bool> StaticallyGreater =
886 unsigned numAdded = 0;
887 if (!StaticallyGreater) {
890 }
else if (*StaticallyGreater ==
false) {
892 Prolog->removeSuccessor(LastPro);
897 if (LastPro != LastEpi) {
901 if (LastPro == KernelBB) {
915 I != E && numAdded > 0; ++
I, --numAdded)
916 updateInstruction(&*
I,
false, j, 0, VRMap);
920 LoopInfo->setPreheader(PrologBBs[MaxIter]);
921 LoopInfo->adjustTripCount(-(MaxIter + 1));
927bool ModuloScheduleExpander::computeDelta(
MachineInstr &
MI,
unsigned &Delta) {
931 bool OffsetIsScalable;
936 if (OffsetIsScalable)
939 if (!BaseOp->
isReg())
947 if (BaseDef && BaseDef->
isPHI()) {
949 BaseDef =
MRI.getVRegDef(BaseReg);
965void ModuloScheduleExpander::updateMemOperands(
MachineInstr &NewMI,
977 if (MMO->isVolatile() || MMO->isAtomic() ||
978 (MMO->isInvariant() && MMO->isDereferenceable()) ||
979 (!MMO->getValue())) {
984 if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
985 int64_t AdjOffset = Delta * Num;
999 unsigned CurStageNum,
1000 unsigned InstStageNum) {
1002 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1009MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1010 MachineInstr *OldMI,
unsigned CurStageNum,
unsigned InstStageNum) {
1012 auto It = InstrChanges.
find(OldMI);
1013 if (It != InstrChanges.
end()) {
1014 std::pair<unsigned, int64_t> RegAndOffset = It->second;
1015 unsigned BasePos, OffsetPos;
1019 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1020 if (Schedule.
getStage(LoopDef) > (
signed)InstStageNum)
1021 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1024 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1030void ModuloScheduleExpander::updateInstruction(
MachineInstr *NewMI,
1032 unsigned CurStageNum,
1033 unsigned InstrStageNum,
1034 ValueMapTy *VRMap) {
1044 VRMap[CurStageNum][reg] = NewReg;
1047 }
else if (MO.
isUse()) {
1050 int DefStageNum = Schedule.
getStage(Def);
1051 unsigned StageNum = CurStageNum;
1052 if (DefStageNum != -1 && (
int)InstrStageNum > DefStageNum) {
1054 unsigned StageDiff = (InstrStageNum - DefStageNum);
1056 StageNum -= StageDiff;
1058 if (VRMap[StageNum].
count(reg))
1059 MO.
setReg(VRMap[StageNum][reg]);
1067MachineInstr *ModuloScheduleExpander::findDefInLoop(
unsigned Reg) {
1070 while (
Def->isPHI()) {
1071 if (!Visited.
insert(Def).second)
1073 for (
unsigned i = 1, e =
Def->getNumOperands(); i < e; i += 2)
1074 if (
Def->getOperand(i + 1).getMBB() == BB) {
1075 Def =
MRI.getVRegDef(
Def->getOperand(i).getReg());
1083unsigned ModuloScheduleExpander::getPrevMapVal(
1084 unsigned StageNum,
unsigned PhiStage,
unsigned LoopVal,
unsigned LoopStage,
1086 unsigned PrevVal = 0;
1087 if (StageNum > PhiStage) {
1089 if (PhiStage == LoopStage && VRMap[StageNum - 1].
count(LoopVal))
1091 PrevVal = VRMap[StageNum - 1][LoopVal];
1092 else if (VRMap[StageNum].
count(LoopVal))
1095 PrevVal = VRMap[StageNum][LoopVal];
1099 else if (StageNum == PhiStage + 1)
1102 else if (StageNum > PhiStage + 1 && LoopInst->
getParent() == BB)
1105 getPrevMapVal(StageNum - 1, PhiStage,
getLoopPhiReg(*LoopInst, BB),
1106 LoopStage, VRMap, BB);
1118 InstrMapTy &InstrMap) {
1119 for (
auto &
PHI : BB->
phis()) {
1120 unsigned InitVal = 0;
1121 unsigned LoopVal = 0;
1127 unsigned NumPhis = getStagesForPhi(PhiDef);
1128 if (NumPhis > StageNum)
1130 for (
unsigned np = 0; np <= NumPhis; ++np) {
1132 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1135 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &
PHI, PhiDef,
1144void ModuloScheduleExpander::rewriteScheduledInstr(
1146 unsigned PhiNum,
MachineInstr *Phi,
unsigned OldReg,
unsigned NewReg,
1149 int StagePhi = Schedule.
getStage(Phi) + PhiNum;
1164 assert(OrigInstr != InstrMap.end() &&
"Instruction not scheduled.");
1166 int StageSched = Schedule.
getStage(OrigMI);
1167 int CycleSched = Schedule.
getCycle(OrigMI);
1168 unsigned ReplaceReg = 0;
1170 if (StagePhi == StageSched &&
Phi->isPHI()) {
1171 int CyclePhi = Schedule.
getCycle(Phi);
1172 if (PrevReg && InProlog)
1173 ReplaceReg = PrevReg;
1174 else if (PrevReg && !isLoopCarried(*Phi) &&
1175 (CyclePhi <= CycleSched || OrigMI->isPHI()))
1176 ReplaceReg = PrevReg;
1178 ReplaceReg = NewReg;
1182 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1183 ReplaceReg = NewReg;
1184 if (StagePhi > StageSched &&
Phi->isPHI())
1185 ReplaceReg = NewReg;
1186 if (!InProlog && !
Phi->isPHI() && StagePhi < StageSched)
1187 ReplaceReg = NewReg;
1190 MRI.constrainRegClass(ReplaceReg,
MRI.getRegClass(OldReg));
1192 UseOp.setReg(ReplaceReg);
1194 Register SplitReg =
MRI.createVirtualRegister(
MRI.getRegClass(OldReg));
1198 UseOp.setReg(SplitReg);
1204bool ModuloScheduleExpander::isLoopCarried(
MachineInstr &Phi) {
1207 int DefCycle = Schedule.
getCycle(&Phi);
1208 int DefStage = Schedule.
getStage(&Phi);
1210 unsigned InitVal = 0;
1211 unsigned LoopVal = 0;
1214 if (!
Use ||
Use->isPHI())
1218 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1233 bool Changed =
true;
1238 if (
MRI.use_empty(
MI.getOperand(0).getReg())) {
1241 MI.eraseFromParent();
1243 }
else if (!KeepSingleSrcPhi &&
MI.getNumExplicitOperands() == 3) {
1245 MRI.constrainRegClass(
MI.getOperand(1).getReg(),
1246 MRI.getRegClass(
MI.getOperand(0).getReg()));
1247 assert(ConstrainRegClass &&
1248 "Expected a valid constrained register class!");
1249 (void)ConstrainRegClass;
1250 MRI.replaceRegWith(
MI.getOperand(0).getReg(),
1251 MI.getOperand(1).getReg());
1254 MI.eraseFromParent();
1263class KernelRewriter {
1299 : S(S), BB(LoopBB), PreheaderBB(
L.getLoopPreheader()),
1300 ExitBB(
L.getExitBlock()),
MRI(BB->
getParent()->getRegInfo()),
1301 TII(BB->
getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1303 if (PreheaderBB == BB)
1307void KernelRewriter::rewrite() {
1317 if (
MI->getParent())
1318 MI->removeFromParent();
1323 assert(FirstMI &&
"Failed to find first MI in schedule");
1330 (
I++)->eraseFromParent();
1335 if (
MI.isPHI() ||
MI.isTerminator())
1344 EliminateDeadPhis(BB,
MRI, LIS);
1350 for (
auto MI = BB->getFirstNonPHI();
MI != BB->end(); ++
MI) {
1359 if (
MI.getParent() != BB) {
1380 int ProducerStage = S.
getStage(Producer);
1381 assert(ConsumerStage != -1 &&
1382 "In-loop consumer should always be scheduled!");
1383 assert(ConsumerStage >= ProducerStage);
1384 unsigned StageDiff = ConsumerStage - ProducerStage;
1386 for (
unsigned I = 0;
I < StageDiff; ++
I)
1396 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1399 LoopProducer =
MRI.getUniqueVRegDef(LoopReg);
1402 int LoopProducerStage = S.
getStage(LoopProducer);
1404 std::optional<Register> IllegalPhiDefault;
1406 if (LoopProducerStage == -1) {
1408 }
else if (LoopProducerStage > ConsumerStage) {
1414 int LoopProducerCycle = S.
getCycle(LoopProducer);
1417 assert(LoopProducerCycle <= ConsumerCycle);
1418 assert(LoopProducerStage == ConsumerStage + 1);
1425 IllegalPhiDefault = Defaults.
front();
1428 assert(ConsumerStage >= LoopProducerStage);
1429 int StageDiff = ConsumerStage - LoopProducerStage;
1430 if (StageDiff > 0) {
1432 <<
" to " << (Defaults.
size() + StageDiff) <<
"\n");
1437 Defaults.
empty() ? std::optional<Register>()
1443 auto DefaultI = Defaults.
rbegin();
1444 while (DefaultI != Defaults.
rend())
1445 LoopReg =
phi(LoopReg, *DefaultI++,
MRI.getRegClass(Reg));
1447 if (IllegalPhiDefault) {
1453 auto RC =
MRI.getRegClass(Reg);
1457 .
addReg(*IllegalPhiDefault)
1463 S.
setStage(IllegalPhi, LoopProducerStage);
1470Register KernelRewriter::phi(
Register LoopReg, std::optional<Register> InitReg,
1474 auto I = Phis.find({LoopReg, *InitReg});
1475 if (
I != Phis.end())
1478 for (
auto &KV : Phis) {
1479 if (KV.first.first == LoopReg)
1486 auto I = UndefPhis.find(LoopReg);
1487 if (
I != UndefPhis.end()) {
1495 MI->getOperand(1).setReg(*InitReg);
1496 Phis.insert({{LoopReg, *InitReg},
R});
1498 MRI.constrainRegClass(R,
MRI.getRegClass(*InitReg));
1499 assert(ConstrainRegClass &&
"Expected a valid constrained register class!");
1500 (void)ConstrainRegClass;
1507 RC =
MRI.getRegClass(LoopReg);
1511 MRI.constrainRegClass(R,
MRI.getRegClass(*InitReg));
1512 assert(ConstrainRegClass &&
"Expected a valid constrained register class!");
1513 (void)ConstrainRegClass;
1516 .
addReg(InitReg ? *InitReg : undef(RC))
1521 UndefPhis[LoopReg] =
R;
1523 Phis[{LoopReg, *InitReg}] =
R;
1533 R =
MRI.createVirtualRegister(RC);
1536 TII->get(TargetOpcode::IMPLICIT_DEF), R);
1545class KernelOperandInfo {
1558 while (isRegInLoop(MO)) {
1560 if (
MI->isFullCopy()) {
1561 MO = &
MI->getOperand(1);
1568 MO = &
MI->getOperand(3);
1573 MO =
MI->getOperand(2).getMBB() == BB ? &
MI->getOperand(1)
1574 : &
MI->getOperand(3);
1581 return PhiDefaults.
size() ==
Other.PhiDefaults.size();
1585 OS <<
"use of " << *
Source <<
": distance(" << PhiDefaults.
size() <<
") in "
1592 MRI.getVRegDef(MO->
getReg())->getParent() == BB;
1604 for (
auto I =
BB->
begin(), NI = NewBB->
begin(); !
I->isTerminator();
1620 if (Stage == -1 || Stage >= MinStage)
1633 for (
auto &Sub : Subs)
1634 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, 0,
1639 MI->eraseFromParent();
1669 MI.removeFromParent();
1673 BlockMIs.erase({SourceBB, KernelMI});
1683 assert(Def->findRegisterDefOperandIdx(
MI.getOperand(1).getReg(),
1686 MI.getOperand(0).setReg(PhiReg);
1690 for (
auto *
P : PhiToDelete)
1691 P->eraseFromParent();
1697 DestBB->
insert(InsertPt, NewMI);
1698 Register OrigR = Phi->getOperand(0).getReg();
1719 if (
Use &&
Use->isPHI() &&
Use->getParent() == SourceBB) {
1734 for (
unsigned I = 0;
I < distance; ++
I) {
1737 unsigned LoopRegIdx = 3, InitRegIdx = 1;
1743 return CanonicalUseReg;
1768 EliminateDeadPhis(ExitingBB,
MRI,
LIS,
true);
1791 EliminateDeadPhis(
B,
MRI,
LIS,
true);
1795 for (
size_t I = 0;
I <
Epilogs.size();
I++) {
1797 for (
size_t J =
I; J <
Epilogs.size(); J++) {
1801 for (
size_t K = Iteration; K >
I; K--)
1815 for (; PI !=
Prologs.end(); ++PI, ++EI) {
1817 (*PI)->addSuccessor(*EI);
1821 if (
Use &&
Use->getParent() == Pred) {
1823 if (CanonicalUse->
isPHI()) {
1844 for (
auto I =
B->instr_rbegin();
1845 I != std::next(
B->getFirstNonPHI()->getReverseIterator());) {
1853 MI->eraseFromParent();
1859 EliminateDeadPhis(
B,
MRI,
LIS);
1860 EliminateDeadPhis(ExitingBB,
MRI,
LIS);
1879 if (
Use.getParent() !=
BB)
1882 Use->substituteRegister(OldR, R, 0,
1891 Exit->replacePhiUsesWith(
BB, NewBB);
1898 assert(CanAnalyzeBr &&
"Must be able to analyze the loop branch!");
1910 unsigned OpIdx =
MI->findRegisterDefOperandIdx(Reg,
nullptr);
1922 R =
MI->getOperand(1).getReg();
1927 MI->getOperand(0).setReg(PhiR);
1933 if (Stage == -1 ||
LiveStages.count(
MI->getParent()) == 0 ||
1948 for (
auto &Sub : Subs)
1949 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, 0,
1954 MI->eraseFromParent();
1959 bool KernelDisposed =
false;
1968 std::optional<bool> StaticallyGreater =
1970 if (!StaticallyGreater) {
1974 }
else if (*StaticallyGreater ==
false) {
1978 Prolog->removeSuccessor(Fallthrough);
1984 KernelDisposed =
true;
1996 if (!KernelDisposed) {
2027 std::string ScheduleDump;
2034 assert(
LIS &&
"Requires LiveIntervals!");
2039 if (!ExpandedKernel) {
2057 IllegalPhis.
insert(&*NI);
2063 auto OI = ExpandedKernel->
begin();
2065 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2066 while (OI->isPHI() || OI->isFullCopy())
2068 while (NI->isPHI() || NI->isFullCopy())
2070 assert(OI->getOpcode() == NI->getOpcode() &&
"Opcodes don't match?!");
2072 for (
auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2073 OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2075 KernelOperandInfo(&*NOpI,
MRI, IllegalPhis));
2079 for (
auto &OldAndNew : KOIs) {
2080 if (OldAndNew.first == OldAndNew.second)
2083 errs() <<
"Modulo kernel validation error: [\n";
2084 errs() <<
" [golden] ";
2085 OldAndNew.first.print(
errs());
2087 OldAndNew.second.print(
errs());
2092 errs() <<
"Golden reference kernel:\n";
2094 errs() <<
"New kernel:\n";
2096 errs() << ScheduleDump;
2098 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2121 if (Exit->pred_size() == 1)
2136 else if (FBB ==
Loop)
2142 Loop->replaceSuccessor(Exit, NewExit);
2143 TII->insertUnconditionalBranch(*NewExit, Exit,
DebugLoc());
2146 Exit->replacePhiUsesWith(
Loop, NewExit);
2156 InstrMapTy &LastStage0Insts,
2160 LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC,
MBB,
Cond,
2179void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
2282 Prolog->addSuccessor(NewKernel);
2287 Epilog->addSuccessor(NewPreheader);
2288 Epilog->addSuccessor(NewExit);
2290 InstrMapTy LastStage0Insts;
2291 insertCondBranch(*Check, Schedule.
getNumStages() + NumUnroll - 2,
2292 LastStage0Insts, *Prolog, *NewPreheader);
2297 generateProlog(PrologVRMap);
2298 generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);
2299 generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);
2303void ModuloScheduleExpanderMVE::updateInstrUse(
2314 if (!UseMO.isReg() || !UseMO.getReg().isVirtual())
2319 if (!DefInst || DefInst->
getParent() != OrigKernel)
2321 unsigned InitReg = 0;
2322 unsigned DefReg = OrigReg;
2323 if (DefInst->
isPHI()) {
2326 getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);
2329 DefInst =
MRI.getVRegDef(LoopReg);
2331 unsigned DefStageNum = Schedule.
getStage(DefInst);
2332 DiffStage += StageNum - DefStageNum;
2334 if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].
count(DefReg))
2336 NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];
2337 else if (!PrevVRMap)
2346 NewReg = (*PrevVRMap)[PrevVRMap->
size() - (DiffStage - PhaseNum)][DefReg];
2349 MRI.constrainRegClass(NewReg,
MRI.getRegClass(OrigReg));
2351 UseMO.setReg(NewReg);
2353 Register SplitReg =
MRI.createVirtualRegister(
MRI.getRegClass(OrigReg));
2354 BuildMI(*OrigKernel,
MI,
MI->getDebugLoc(), TII->
get(TargetOpcode::COPY),
2357 UseMO.setReg(SplitReg);
2366 unsigned InitVal, LoopVal;
2375void ModuloScheduleExpanderMVE::generatePhi(
2380 int StageNum = Schedule.
getStage(OrigMI);
2382 if (Schedule.
getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)
2383 UsePrologReg =
true;
2384 else if (Schedule.
getNumStages() - NumUnroll + UnrollNum == StageNum)
2385 UsePrologReg =
false;
2421 if (!DefMO.isReg() || DefMO.isDead())
2424 auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);
2425 if (NewReg == KernelVRMap[UnrollNum].
end())
2429 int PrologNum = Schedule.
getNumStages() - NumUnroll + UnrollNum - 1;
2430 CorrespondReg = PrologVRMap[PrologNum][OrigReg];
2439 Register PhiReg =
MRI.createVirtualRegister(
MRI.getRegClass(OrigReg));
2441 TII->
get(TargetOpcode::PHI), PhiReg)
2446 PhiVRMap[UnrollNum][OrigReg] = PhiReg;
2452 for (
unsigned Idx = 1;
Idx < Phi.getNumOperands();
Idx += 2) {
2453 if (Phi.getOperand(
Idx).getReg() == OrigReg) {
2454 Phi.getOperand(
Idx).setReg(NewReg);
2455 Phi.getOperand(
Idx + 1).setMBB(NewMBB);
2462void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(
Register OrigReg,
2470 if (
O.getParent()->getParent() != OrigKernel &&
2471 O.getParent()->getParent() != Prolog &&
2472 O.getParent()->getParent() != NewKernel &&
2473 O.getParent()->getParent() != Epilog)
2475 if (
O.getParent()->getParent() == OrigKernel &&
O.getParent()->isPHI())
2481 if (!UsesAfterLoop.
empty()) {
2482 Register PhiReg =
MRI.createVirtualRegister(
MRI.getRegClass(OrigReg));
2484 TII->
get(TargetOpcode::PHI), PhiReg)
2499 if (!LoopPhis.
empty()) {
2501 unsigned InitReg, LoopReg;
2502 getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);
2503 Register NewInit =
MRI.createVirtualRegister(
MRI.getRegClass(InitReg));
2505 TII->
get(TargetOpcode::PHI), NewInit)
2515void ModuloScheduleExpanderMVE::generateProlog(
2517 PrologVRMap.
clear();
2520 for (
int PrologNum = 0; PrologNum < Schedule.
getNumStages() - 1;
2526 if (StageNum > PrologNum)
2529 updateInstrDef(NewMI, PrologVRMap[PrologNum],
false);
2530 NewMIMap[NewMI] = {PrologNum, StageNum};
2531 Prolog->push_back(NewMI);
2535 for (
auto I : NewMIMap) {
2537 int PrologNum =
I.second.first;
2538 int StageNum =
I.second.second;
2539 updateInstrUse(
MI, StageNum, PrologNum, PrologVRMap,
nullptr);
2543 dbgs() <<
"prolog:\n";
2548void ModuloScheduleExpanderMVE::generateKernel(
2551 KernelVRMap.
clear();
2552 KernelVRMap.
resize(NumUnroll);
2554 PhiVRMap.
resize(NumUnroll);
2557 for (
int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {
2563 if (UnrollNum == NumUnroll - 1)
2564 LastStage0Insts[
MI] = NewMI;
2565 updateInstrDef(NewMI, KernelVRMap[UnrollNum],
2566 (UnrollNum == NumUnroll - 1 && StageNum == 0));
2567 generatePhi(
MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);
2568 NewMIMap[NewMI] = {UnrollNum, StageNum};
2573 for (
auto I : NewMIMap) {
2575 int UnrollNum =
I.second.first;
2576 int StageNum =
I.second.second;
2577 updateInstrUse(
MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);
2581 insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,
2585 dbgs() <<
"kernel:\n";
2590void ModuloScheduleExpanderMVE::generateEpilog(
2593 EpilogVRMap.
clear();
2596 for (
int EpilogNum = 0; EpilogNum < Schedule.
getNumStages() - 1;
2602 if (StageNum <= EpilogNum)
2605 updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);
2606 NewMIMap[NewMI] = {EpilogNum, StageNum};
2607 Epilog->push_back(NewMI);
2611 for (
auto I : NewMIMap) {
2613 int EpilogNum =
I.second.first;
2614 int StageNum =
I.second.second;
2615 updateInstrUse(
MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);
2622 insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);
2625 dbgs() <<
"epilog:\n";
2631void ModuloScheduleExpanderMVE::calcNumUnroll() {
2648 int NumUnrollLocal = 1;
2656 if (Inst2Idx[
MI] <= Inst2Idx[
DefMI])
2658 NumUnroll = std::max(NumUnroll, NumUnrollLocal);
2667void ModuloScheduleExpanderMVE::updateInstrDef(
MachineInstr *NewMI,
2677 VRMap[
Reg] = NewReg;
2679 mergeRegUsesAfterPipeline(Reg, NewReg);
2690 generatePipelinedLoop();
2695 if (!L.getExitBlock()) {
2696 LLVM_DEBUG(
dbgs() <<
"Can not apply MVE expander: No single exit block.\n");
2712 if (
Ref.getParent() != BB ||
Ref.isPHI()) {
2713 LLVM_DEBUG(
dbgs() <<
"Can not apply MVE expander: A phi result is "
2714 "referenced outside of the loop or by phi.\n");
2721 unsigned InitVal, LoopVal;
2723 if (!
Register(LoopVal).isVirtual() ||
2724 MRI.getVRegDef(LoopVal)->getParent() != BB) {
2726 dbgs() <<
"Can not apply MVE expander: A phi source value coming "
2727 "from the loop is not defined in the loop.\n");
2730 if (UsedByPhi.
count(LoopVal)) {
2731 LLVM_DEBUG(
dbgs() <<
"Can not apply MVE expander: A value defined in the "
2732 "loop is referenced by two or more phis.\n");
2735 UsedByPhi.
insert(LoopVal);
2774char ModuloScheduleTest::ID = 0;
2777 "Modulo Schedule test pass",
false,
false)
2784 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
2785 for (
auto *L : MLI) {
2786 if (L->getTopBlock() != L->getBottomBlock())
2795 std::pair<StringRef, StringRef> StageAndCycle = getToken(S,
"_");
2796 std::pair<StringRef, StringRef> StageTokenAndValue =
2797 getToken(StageAndCycle.first,
"-");
2798 std::pair<StringRef, StringRef> CycleTokenAndValue =
2799 getToken(StageAndCycle.second,
"-");
2800 if (StageTokenAndValue.first !=
"Stage" ||
2801 CycleTokenAndValue.first !=
"_Cycle") {
2803 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2807 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2808 CycleTokenAndValue.second.drop_front().getAsInteger(10,
Cycle);
2810 dbgs() <<
" Stage=" << Stage <<
", Cycle=" <<
Cycle <<
"\n";
2814 LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
2816 dbgs() <<
"--- ModuloScheduleTest running on BB#" << BB->
getNumber() <<
"\n";
2819 std::vector<MachineInstr *> Instrs;
2821 if (
MI.isTerminator())
2823 Instrs.push_back(&
MI);
2825 dbgs() <<
"Parsing post-instr symbol for " <<
MI;
2848 MI->setPostInstrSymbol(MF,
Sym);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::optional< std::vector< StOtherPiece > > Other
DenseMap< Block *, BlockRelaxAux > Blocks
global merge Global merge function pass
const HexagonInstrInfo * TII
static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static MachineBasicBlock * createDedicatedExit(MachineBasicBlock *Loop, MachineBasicBlock *Exit)
Create a dedicated exit for Loop.
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg, MachineBasicBlock *NewMBB)
static MachineInstr * getLoopPhiUser(Register Reg, MachineBasicBlock *Loop)
Return a phi if Reg is referenced by the phi.
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
static cl::opt< bool > SwapBranchTargetsMVE("pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false), cl::desc("Swap target blocks of a conditional branch for MVE expander"))
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
A possibly irreducible generalization of a Loop.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
bool hasInterval(Register Reg) const
void insertMBBInMaps(MachineBasicBlock *MBB)
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LiveInterval & createEmptyInterval(Register Reg)
Interval creation.
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
BlockT * getExitBlock() const
If getExitBlocks would return exactly one block, return that block.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
instr_iterator instr_begin()
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
reverse_instr_iterator instr_rbegin()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
succ_iterator succ_begin()
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
SmallVectorImpl< MachineBasicBlock * >::iterator succ_iterator
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
pred_iterator pred_begin()
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
Instructions::reverse_iterator reverse_instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
void dropMemRefs(MachineFunction &MF)
Clear this MachineInstr's memory reference descriptor list.
iterator_range< mop_iterator > operands()
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
iterator_range< filtered_mop_iterator > all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
defusechain_iterator - This class provides iterator support for machine operands in the function that...
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
static use_instr_iterator use_instr_end()
void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
const TargetRegisterInfo * getTargetRegisterInfo() const
use_iterator use_begin(Register RegNo) const
static use_iterator use_end()
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
MachineLoop * getLoop() const
Return the single-block loop being scheduled.
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
void print(raw_ostream &OS)
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
void setStage(MachineInstr *MI, int MIStage)
Set the stage of a newly created instruction.
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
std::deque< MachineBasicBlock * > PeeledBack
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
ModuloSchedule & Schedule
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
SmallVector< MachineBasicBlock *, 4 > Epilogs
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
MachineBasicBlock * Preheader
The original loop preheader.
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
const TargetInstrInfo * TII
void filterInstructions(MachineBasicBlock *MB, int MinStage)
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
MachineRegisterInfo & MRI
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
void push_back(const T &Elt)
reverse_iterator rbegin()
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const
Reverses the branch condition of the specified condition list, returning false on success and true if...
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
A Use represents the edge between a Value definition and its users.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
NodeAddr< DefNode * > Def
const_iterator end(StringRef path)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
testing::Matcher< const detail::ErrorHolder & > Failed()
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Ref
The access may reference the value stored in memory.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
OutputIt copy(R &&Range, OutputIt Out)
void initializeModuloScheduleTestPass(PassRegistry &)
@ LPD_Back
Peel the last iteration of the loop.
@ LPD_Front
Peel the first iteration of the loop.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...