LLVM 20.0.0git
HexagonSubtarget.cpp
Go to the documentation of this file.
1//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Hexagon specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#include "HexagonSubtarget.h"
14#include "Hexagon.h"
15#include "HexagonInstrInfo.h"
16#include "HexagonRegisterInfo.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallSet.h"
21#include "llvm/ADT/StringRef.h"
27#include "llvm/IR/IntrinsicsHexagon.h"
31#include <algorithm>
32#include <cassert>
33#include <map>
34#include <optional>
35
36using namespace llvm;
37
38#define DEBUG_TYPE "hexagon-subtarget"
39
40#define GET_SUBTARGETINFO_CTOR
41#define GET_SUBTARGETINFO_TARGET_DESC
42#include "HexagonGenSubtargetInfo.inc"
43
44static cl::opt<bool> EnableBSBSched("enable-bsb-sched", cl::Hidden,
45 cl::init(true));
46
47static cl::opt<bool> EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden,
48 cl::init(false));
49
50static cl::opt<bool>
51 EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(true),
52 cl::desc("Enable the scheduler to generate .cur"));
53
54static cl::opt<bool>
55 DisableHexagonMISched("disable-hexagon-misched", cl::Hidden,
56 cl::desc("Disable Hexagon MI Scheduling"));
57
59 "hexagon-long-calls", cl::Hidden,
60 cl::desc("If present, forces/disables the use of long calls"));
61
62static cl::opt<bool>
63 EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden,
64 cl::desc("Consider calls to be predicable"));
65
66static cl::opt<bool> SchedPredsCloser("sched-preds-closer", cl::Hidden,
67 cl::init(true));
68
69static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
70 cl::Hidden, cl::init(true));
71
73 "hexagon-check-bank-conflict", cl::Hidden, cl::init(true),
74 cl::desc("Enable checking for cache bank conflicts"));
75
77 StringRef FS, const TargetMachine &TM)
78 : HexagonGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
79 OptLevel(TM.getOptLevel()),
80 CPUString(std::string(Hexagon_MC::selectHexagonCPU(CPU))),
81 TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
82 RegInfo(getHwMode()), TLInfo(TM, *this),
83 InstrItins(getInstrItineraryForCPU(CPUString)) {
85 // Beware of the default constructor of InstrItineraryData: it will
86 // reset all members to 0.
87 assert(InstrItins.Itineraries != nullptr && "InstrItins not initialized");
88}
89
92 std::optional<Hexagon::ArchEnum> ArchVer = Hexagon::getCpu(CPUString);
93 if (ArchVer)
94 HexagonArchVersion = *ArchVer;
95 else
96 llvm_unreachable("Unrecognized Hexagon processor version");
97
98 UseHVX128BOps = false;
99 UseHVX64BOps = false;
100 UseAudioOps = false;
101 UseLongCalls = false;
102
103 SubtargetFeatures Features(FS);
104
105 // Turn on QFloat if the HVX version is v68+.
106 // The function ParseSubtargetFeatures will set feature bits and initialize
107 // subtarget's variables all in one, so there isn't a good way to preprocess
108 // the feature string, other than by tinkering with it directly.
109 auto IsQFloatFS = [](StringRef F) {
110 return F == "+hvx-qfloat" || F == "-hvx-qfloat";
111 };
112 if (!llvm::count_if(Features.getFeatures(), IsQFloatFS)) {
113 auto getHvxVersion = [&Features](StringRef FS) -> StringRef {
114 for (StringRef F : llvm::reverse(Features.getFeatures())) {
115 if (F.starts_with("+hvxv"))
116 return F;
117 }
118 for (StringRef F : llvm::reverse(Features.getFeatures())) {
119 if (F == "-hvx")
120 return StringRef();
121 if (F.starts_with("+hvx") || F == "-hvx")
122 return F.take_front(4); // Return "+hvx" or "-hvx".
123 }
124 return StringRef();
125 };
126
127 bool AddQFloat = false;
128 StringRef HvxVer = getHvxVersion(FS);
129 if (HvxVer.starts_with("+hvxv")) {
130 int Ver = 0;
131 if (!HvxVer.drop_front(5).consumeInteger(10, Ver) && Ver >= 68)
132 AddQFloat = true;
133 } else if (HvxVer == "+hvx") {
134 if (hasV68Ops())
135 AddQFloat = true;
136 }
137
138 if (AddQFloat)
139 Features.AddFeature("+hvx-qfloat");
140 }
141
142 std::string FeatureString = Features.getString();
143 ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FeatureString);
144
145 if (useHVXV68Ops())
146 UseHVXFloatingPoint = UseHVXIEEEFPOps || UseHVXQFloatOps;
147
148 if (UseHVXQFloatOps && UseHVXIEEEFPOps && UseHVXFloatingPoint)
150 dbgs() << "Behavior is undefined for simultaneous qfloat and ieee hvx codegen...");
151
152 if (OverrideLongCalls.getPosition())
153 UseLongCalls = OverrideLongCalls;
154
156
157 if (isTinyCore()) {
158 // Tiny core has a single thread, so back-to-back scheduling is enabled by
159 // default.
160 if (!EnableBSBSched.getPosition())
161 UseBSBScheduling = false;
162 }
163
164 FeatureBitset FeatureBits = getFeatureBits();
166 setFeatureBits(FeatureBits.reset(Hexagon::FeatureDuplex));
167 setFeatureBits(Hexagon_MC::completeHVXFeatures(FeatureBits));
168
169 return *this;
170}
171
172bool HexagonSubtarget::isHVXElementType(MVT Ty, bool IncludeBool) const {
173 if (!useHVXOps())
174 return false;
175 if (Ty.isVector())
176 Ty = Ty.getVectorElementType();
177 if (IncludeBool && Ty == MVT::i1)
178 return true;
179 ArrayRef<MVT> ElemTypes = getHVXElementTypes();
180 return llvm::is_contained(ElemTypes, Ty);
181}
182
183bool HexagonSubtarget::isHVXVectorType(EVT VecTy, bool IncludeBool) const {
184 if (!VecTy.isSimple())
185 return false;
186 if (!VecTy.isVector() || !useHVXOps() || VecTy.isScalableVector())
187 return false;
188 MVT ElemTy = VecTy.getSimpleVT().getVectorElementType();
189 if (!IncludeBool && ElemTy == MVT::i1)
190 return false;
191
192 unsigned HwLen = getVectorLength();
193 unsigned NumElems = VecTy.getVectorNumElements();
194 ArrayRef<MVT> ElemTypes = getHVXElementTypes();
195
196 if (IncludeBool && ElemTy == MVT::i1) {
197 // Boolean HVX vector types are formed from regular HVX vector types
198 // by replacing the element type with i1.
199 for (MVT T : ElemTypes)
200 if (NumElems * T.getSizeInBits() == 8 * HwLen)
201 return true;
202 return false;
203 }
204
205 unsigned VecWidth = VecTy.getSizeInBits();
206 if (VecWidth != 8 * HwLen && VecWidth != 16 * HwLen)
207 return false;
208 return llvm::is_contained(ElemTypes, ElemTy);
209}
210
211bool HexagonSubtarget::isTypeForHVX(Type *VecTy, bool IncludeBool) const {
212 if (!VecTy->isVectorTy() || isa<ScalableVectorType>(VecTy))
213 return false;
214 // Avoid types like <2 x i32*>.
215 Type *ScalTy = VecTy->getScalarType();
216 if (!ScalTy->isIntegerTy() &&
217 !(ScalTy->isFloatingPointTy() && useHVXFloatingPoint()))
218 return false;
219 // The given type may be something like <17 x i32>, which is not MVT,
220 // but can be represented as (non-simple) EVT.
221 EVT Ty = EVT::getEVT(VecTy, /*HandleUnknown*/false);
222 if (!Ty.getVectorElementType().isSimple())
223 return false;
224
225 auto isHvxTy = [this, IncludeBool](MVT SimpleTy) {
226 if (isHVXVectorType(SimpleTy, IncludeBool))
227 return true;
228 auto Action = getTargetLowering()->getPreferredVectorAction(SimpleTy);
230 };
231
232 // Round up EVT to have power-of-2 elements, and keep checking if it
233 // qualifies for HVX, dividing it in half after each step.
234 MVT ElemTy = Ty.getVectorElementType().getSimpleVT();
235 unsigned VecLen = PowerOf2Ceil(Ty.getVectorNumElements());
236 while (VecLen > 1) {
237 MVT SimpleTy = MVT::getVectorVT(ElemTy, VecLen);
238 if (SimpleTy.isValid() && isHvxTy(SimpleTy))
239 return true;
240 VecLen /= 2;
241 }
242
243 return false;
244}
245
247 for (SUnit &SU : DAG->SUnits) {
248 if (!SU.isInstr())
249 continue;
251 for (auto &D : SU.Preds)
252 if (D.getKind() == SDep::Output && D.getReg() == Hexagon::USR_OVF)
253 Erase.push_back(D);
254 for (auto &E : Erase)
255 SU.removePred(E);
256 }
257}
258
260 for (SUnit &SU : DAG->SUnits) {
261 // Update the latency of chain edges between v60 vector load or store
262 // instructions to be 1. These instruction cannot be scheduled in the
263 // same packet.
264 MachineInstr &MI1 = *SU.getInstr();
265 auto *QII = static_cast<const HexagonInstrInfo*>(DAG->TII);
266 bool IsStoreMI1 = MI1.mayStore();
267 bool IsLoadMI1 = MI1.mayLoad();
268 if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1))
269 continue;
270 for (SDep &SI : SU.Succs) {
271 if (SI.getKind() != SDep::Order || SI.getLatency() != 0)
272 continue;
273 MachineInstr &MI2 = *SI.getSUnit()->getInstr();
274 if (!QII->isHVXVec(MI2))
275 continue;
276 if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) {
277 SI.setLatency(1);
278 SU.setHeightDirty();
279 // Change the dependence in the opposite direction too.
280 for (SDep &PI : SI.getSUnit()->Preds) {
281 if (PI.getSUnit() != &SU || PI.getKind() != SDep::Order)
282 continue;
283 PI.setLatency(1);
284 SI.getSUnit()->setDepthDirty();
285 }
286 }
287 }
288 }
289}
290
291// Check if a call and subsequent A2_tfrpi instructions should maintain
292// scheduling affinity. We are looking for the TFRI to be consumed in
293// the next instruction. This should help reduce the instances of
294// double register pairs being allocated and scheduled before a call
295// when not used until after the call. This situation is exacerbated
296// by the fact that we allocate the pair from the callee saves list,
297// leading to excess spills and restores.
298bool HexagonSubtarget::CallMutation::shouldTFRICallBind(
299 const HexagonInstrInfo &HII, const SUnit &Inst1,
300 const SUnit &Inst2) const {
301 if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
302 return false;
303
304 // TypeXTYPE are 64 bit operations.
305 unsigned Type = HII.getType(*Inst2.getInstr());
308}
309
311 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
312 SUnit* LastSequentialCall = nullptr;
313 // Map from virtual register to physical register from the copy.
314 DenseMap<unsigned, unsigned> VRegHoldingReg;
315 // Map from the physical register to the instruction that uses virtual
316 // register. This is used to create the barrier edge.
317 DenseMap<unsigned, SUnit *> LastVRegUse;
318 auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
319 auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
320
321 // Currently we only catch the situation when compare gets scheduled
322 // before preceding call.
323 for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) {
324 // Remember the call.
325 if (DAG->SUnits[su].getInstr()->isCall())
326 LastSequentialCall = &DAG->SUnits[su];
327 // Look for a compare that defines a predicate.
328 else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
329 DAG->addEdge(&DAG->SUnits[su], SDep(LastSequentialCall, SDep::Barrier));
330 // Look for call and tfri* instructions.
331 else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
332 shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
333 DAG->addEdge(&DAG->SUnits[su], SDep(&DAG->SUnits[su-1], SDep::Barrier));
334 // Prevent redundant register copies due to reads and writes of physical
335 // registers. The original motivation for this was the code generated
336 // between two calls, which are caused both the return value and the
337 // argument for the next call being in %r0.
338 // Example:
339 // 1: <call1>
340 // 2: %vreg = COPY %r0
341 // 3: <use of %vreg>
342 // 4: %r0 = ...
343 // 5: <call2>
344 // The scheduler would often swap 3 and 4, so an additional register is
345 // needed. This code inserts a Barrier dependence between 3 & 4 to prevent
346 // this.
347 // The code below checks for all the physical registers, not just R0/D0/V0.
348 else if (SchedRetvalOptimization) {
349 const MachineInstr *MI = DAG->SUnits[su].getInstr();
350 if (MI->isCopy() && MI->getOperand(1).getReg().isPhysical()) {
351 // %vregX = COPY %r0
352 VRegHoldingReg[MI->getOperand(0).getReg()] = MI->getOperand(1).getReg();
353 LastVRegUse.erase(MI->getOperand(1).getReg());
354 } else {
355 for (const MachineOperand &MO : MI->operands()) {
356 if (!MO.isReg())
357 continue;
358 if (MO.isUse() && !MI->isCopy() &&
359 VRegHoldingReg.count(MO.getReg())) {
360 // <use of %vregX>
361 LastVRegUse[VRegHoldingReg[MO.getReg()]] = &DAG->SUnits[su];
362 } else if (MO.isDef() && MO.getReg().isPhysical()) {
363 for (MCRegAliasIterator AI(MO.getReg(), &TRI, true); AI.isValid();
364 ++AI) {
365 if (LastVRegUse.count(*AI) &&
366 LastVRegUse[*AI] != &DAG->SUnits[su])
367 // %r0 = ...
368 DAG->addEdge(&DAG->SUnits[su], SDep(LastVRegUse[*AI], SDep::Barrier));
369 LastVRegUse.erase(*AI);
370 }
371 }
372 }
373 }
374 }
375 }
376}
377
380 return;
381
382 const auto &HII = static_cast<const HexagonInstrInfo&>(*DAG->TII);
383
384 // Create artificial edges between loads that could likely cause a bank
385 // conflict. Since such loads would normally not have any dependency
386 // between them, we cannot rely on existing edges.
387 for (unsigned i = 0, e = DAG->SUnits.size(); i != e; ++i) {
388 SUnit &S0 = DAG->SUnits[i];
389 MachineInstr &L0 = *S0.getInstr();
390 if (!L0.mayLoad() || L0.mayStore() ||
392 continue;
393 int64_t Offset0;
394 LocationSize Size0 = 0;
395 MachineOperand *BaseOp0 = HII.getBaseAndOffset(L0, Offset0, Size0);
396 // Is the access size is longer than the L1 cache line, skip the check.
397 if (BaseOp0 == nullptr || !BaseOp0->isReg() || !Size0.hasValue() ||
398 Size0.getValue() >= 32)
399 continue;
400 // Scan only up to 32 instructions ahead (to avoid n^2 complexity).
401 for (unsigned j = i+1, m = std::min(i+32, e); j != m; ++j) {
402 SUnit &S1 = DAG->SUnits[j];
403 MachineInstr &L1 = *S1.getInstr();
404 if (!L1.mayLoad() || L1.mayStore() ||
406 continue;
407 int64_t Offset1;
408 LocationSize Size1 = 0;
409 MachineOperand *BaseOp1 = HII.getBaseAndOffset(L1, Offset1, Size1);
410 if (BaseOp1 == nullptr || !BaseOp1->isReg() || !Size0.hasValue() ||
411 Size1.getValue() >= 32 || BaseOp0->getReg() != BaseOp1->getReg())
412 continue;
413 // Check bits 3 and 4 of the offset: if they differ, a bank conflict
414 // is unlikely.
415 if (((Offset0 ^ Offset1) & 0x18) != 0)
416 continue;
417 // Bits 3 and 4 are the same, add an artificial edge and set extra
418 // latency.
419 SDep A(&S0, SDep::Artificial);
420 A.setLatency(1);
421 S1.addPred(A, true);
422 }
423 }
424}
425
426/// Enable use of alias analysis during code generation (during MI
427/// scheduling, DAGCombine, etc.).
430 return true;
431 return false;
432}
433
434/// Perform target specific adjustments to the latency of a schedule
435/// dependency.
437 SUnit *Src, int SrcOpIdx, SUnit *Dst, int DstOpIdx, SDep &Dep,
438 const TargetSchedModel *SchedModel) const {
439 if (!Src->isInstr() || !Dst->isInstr())
440 return;
441
442 MachineInstr *SrcInst = Src->getInstr();
443 MachineInstr *DstInst = Dst->getInstr();
444 const HexagonInstrInfo *QII = getInstrInfo();
445
446 // Instructions with .new operands have zero latency.
447 SmallSet<SUnit *, 4> ExclSrc;
448 SmallSet<SUnit *, 4> ExclDst;
449 if (QII->canExecuteInBundle(*SrcInst, *DstInst) &&
450 isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
451 Dep.setLatency(0);
452 return;
453 }
454
455 // Set the latency for a copy to zero since we hope that is will get
456 // removed.
457 if (DstInst->isCopy())
458 Dep.setLatency(0);
459
460 // If it's a REG_SEQUENCE/COPY, use its destination instruction to determine
461 // the correct latency.
462 // If there are multiple uses of the def of COPY/REG_SEQUENCE, set the latency
463 // only if the latencies on all the uses are equal, otherwise set it to
464 // default.
465 if ((DstInst->isRegSequence() || DstInst->isCopy())) {
466 Register DReg = DstInst->getOperand(0).getReg();
467 std::optional<unsigned> DLatency;
468 for (const auto &DDep : Dst->Succs) {
469 MachineInstr *DDst = DDep.getSUnit()->getInstr();
470 int UseIdx = -1;
471 for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); OpNum++) {
472 const MachineOperand &MO = DDst->getOperand(OpNum);
473 if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == DReg) {
474 UseIdx = OpNum;
475 break;
476 }
477 }
478
479 if (UseIdx == -1)
480 continue;
481
482 std::optional<unsigned> Latency =
483 InstrInfo.getOperandLatency(&InstrItins, *SrcInst, 0, *DDst, UseIdx);
484
485 // Set DLatency for the first time.
486 if (!DLatency)
487 DLatency = Latency;
488
489 // For multiple uses, if the Latency is different across uses, reset
490 // DLatency.
491 if (DLatency != Latency) {
492 DLatency = std::nullopt;
493 break;
494 }
495 }
496 Dep.setLatency(DLatency ? *DLatency : 0);
497 }
498
499 // Try to schedule uses near definitions to generate .cur.
500 ExclSrc.clear();
501 ExclDst.clear();
502 if (EnableDotCurSched && QII->isToBeScheduledASAP(*SrcInst, *DstInst) &&
503 isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) {
504 Dep.setLatency(0);
505 return;
506 }
507 int Latency = Dep.getLatency();
508 bool IsArtificial = Dep.isArtificial();
509 Latency = updateLatency(*SrcInst, *DstInst, IsArtificial, Latency);
510 Dep.setLatency(Latency);
511}
512
514 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
515 Mutations.push_back(std::make_unique<UsrOverflowMutation>());
516 Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
517 Mutations.push_back(std::make_unique<BankConflictMutation>());
518}
519
521 std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
522 Mutations.push_back(std::make_unique<UsrOverflowMutation>());
523 Mutations.push_back(std::make_unique<HVXMemLatencyMutation>());
524}
525
526// Pin the vtable to this file.
527void HexagonSubtarget::anchor() {}
528
530 if (DisableHexagonMISched.getNumOccurrences())
531 return !DisableHexagonMISched;
532 return true;
533}
534
537}
538
539int HexagonSubtarget::updateLatency(MachineInstr &SrcInst,
540 MachineInstr &DstInst, bool IsArtificial,
541 int Latency) const {
542 if (IsArtificial)
543 return 1;
544 if (!hasV60Ops())
545 return Latency;
546
547 auto &QII = static_cast<const HexagonInstrInfo &>(*getInstrInfo());
548 // BSB scheduling.
549 if (QII.isHVXVec(SrcInst) || useBSBScheduling())
550 Latency = (Latency + 1) >> 1;
551 return Latency;
552}
553
554void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
555 MachineInstr *SrcI = Src->getInstr();
556 for (auto &I : Src->Succs) {
557 if (!I.isAssignedRegDep() || I.getSUnit() != Dst)
558 continue;
559 Register DepR = I.getReg();
560 int DefIdx = -1;
561 for (unsigned OpNum = 0; OpNum < SrcI->getNumOperands(); OpNum++) {
562 const MachineOperand &MO = SrcI->getOperand(OpNum);
563 bool IsSameOrSubReg = false;
564 if (MO.isReg()) {
565 Register MOReg = MO.getReg();
566 if (DepR.isVirtual()) {
567 IsSameOrSubReg = (MOReg == DepR);
568 } else {
569 IsSameOrSubReg = getRegisterInfo()->isSubRegisterEq(DepR, MOReg);
570 }
571 if (MO.isDef() && IsSameOrSubReg)
572 DefIdx = OpNum;
573 }
574 }
575 assert(DefIdx >= 0 && "Def Reg not found in Src MI");
576 MachineInstr *DstI = Dst->getInstr();
577 SDep T = I;
578 for (unsigned OpNum = 0; OpNum < DstI->getNumOperands(); OpNum++) {
579 const MachineOperand &MO = DstI->getOperand(OpNum);
580 if (MO.isReg() && MO.isUse() && MO.getReg() == DepR) {
581 std::optional<unsigned> Latency = InstrInfo.getOperandLatency(
582 &InstrItins, *SrcI, DefIdx, *DstI, OpNum);
583
584 // For some instructions (ex: COPY), we might end up with < 0 latency
585 // as they don't have any Itinerary class associated with them.
586 if (!Latency)
587 Latency = 0;
588 bool IsArtificial = I.isArtificial();
589 Latency = updateLatency(*SrcI, *DstI, IsArtificial, *Latency);
590 I.setLatency(*Latency);
591 }
592 }
593
594 // Update the latency of opposite edge too.
595 T.setSUnit(Src);
596 auto F = find(Dst->Preds, T);
597 assert(F != Dst->Preds.end());
598 F->setLatency(I.getLatency());
599 }
600}
601
602/// Change the latency between the two SUnits.
603void HexagonSubtarget::changeLatency(SUnit *Src, SUnit *Dst, unsigned Lat)
604 const {
605 for (auto &I : Src->Succs) {
606 if (!I.isAssignedRegDep() || I.getSUnit() != Dst)
607 continue;
608 SDep T = I;
609 I.setLatency(Lat);
610
611 // Update the latency of opposite edge too.
612 T.setSUnit(Src);
613 auto F = find(Dst->Preds, T);
614 assert(F != Dst->Preds.end());
615 F->setLatency(Lat);
616 }
617}
618
619/// If the SUnit has a zero latency edge, return the other SUnit.
621 for (auto &I : Deps)
622 if (I.isAssignedRegDep() && I.getLatency() == 0 &&
623 !I.getSUnit()->getInstr()->isPseudo())
624 return I.getSUnit();
625 return nullptr;
626}
627
628// Return true if these are the best two instructions to schedule
629// together with a zero latency. Only one dependence should have a zero
630// latency. If there are multiple choices, choose the best, and change
631// the others, if needed.
632bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst,
634 SmallSet<SUnit*, 4> &ExclDst) const {
635 MachineInstr &SrcInst = *Src->getInstr();
636 MachineInstr &DstInst = *Dst->getInstr();
637
638 // Ignore Boundary SU nodes as these have null instructions.
639 if (Dst->isBoundaryNode())
640 return false;
641
642 if (SrcInst.isPHI() || DstInst.isPHI())
643 return false;
644
645 if (!TII->isToBeScheduledASAP(SrcInst, DstInst) &&
646 !TII->canExecuteInBundle(SrcInst, DstInst))
647 return false;
648
649 // The architecture doesn't allow three dependent instructions in the same
650 // packet. So, if the destination has a zero latency successor, then it's
651 // not a candidate for a zero latency predecessor.
652 if (getZeroLatency(Dst, Dst->Succs) != nullptr)
653 return false;
654
655 // Check if the Dst instruction is the best candidate first.
656 SUnit *Best = nullptr;
657 SUnit *DstBest = nullptr;
658 SUnit *SrcBest = getZeroLatency(Dst, Dst->Preds);
659 if (SrcBest == nullptr || Src->NodeNum >= SrcBest->NodeNum) {
660 // Check that Src doesn't have a better candidate.
661 DstBest = getZeroLatency(Src, Src->Succs);
662 if (DstBest == nullptr || Dst->NodeNum <= DstBest->NodeNum)
663 Best = Dst;
664 }
665 if (Best != Dst)
666 return false;
667
668 // The caller frequently adds the same dependence twice. If so, then
669 // return true for this case too.
670 if ((Src == SrcBest && Dst == DstBest ) ||
671 (SrcBest == nullptr && Dst == DstBest) ||
672 (Src == SrcBest && Dst == nullptr))
673 return true;
674
675 // Reassign the latency for the previous bests, which requires setting
676 // the dependence edge in both directions.
677 if (SrcBest != nullptr) {
678 if (!hasV60Ops())
679 changeLatency(SrcBest, Dst, 1);
680 else
681 restoreLatency(SrcBest, Dst);
682 }
683 if (DstBest != nullptr) {
684 if (!hasV60Ops())
685 changeLatency(Src, DstBest, 1);
686 else
687 restoreLatency(Src, DstBest);
688 }
689
690 // Attempt to find another opprotunity for zero latency in a different
691 // dependence.
692 if (SrcBest && DstBest)
693 // If there is an edge from SrcBest to DstBst, then try to change that
694 // to 0 now.
695 changeLatency(SrcBest, DstBest, 0);
696 else if (DstBest) {
697 // Check if the previous best destination instruction has a new zero
698 // latency dependence opportunity.
699 ExclSrc.insert(Src);
700 for (auto &I : DstBest->Preds)
701 if (ExclSrc.count(I.getSUnit()) == 0 &&
702 isBestZeroLatency(I.getSUnit(), DstBest, TII, ExclSrc, ExclDst))
703 changeLatency(I.getSUnit(), DstBest, 0);
704 } else if (SrcBest) {
705 // Check if previous best source instruction has a new zero latency
706 // dependence opportunity.
707 ExclDst.insert(Dst);
708 for (auto &I : SrcBest->Succs)
709 if (ExclDst.count(I.getSUnit()) == 0 &&
710 isBestZeroLatency(SrcBest, I.getSUnit(), TII, ExclSrc, ExclDst))
711 changeLatency(SrcBest, I.getSUnit(), 0);
712 }
713
714 return true;
715}
716
718 return 32;
719}
720
722 return 32;
723}
724
725bool HexagonSubtarget::enableSubRegLiveness() const { return true; }
726
728 struct Scalar {
729 unsigned Opcode;
730 Intrinsic::ID IntId;
731 };
732 struct Hvx {
733 unsigned Opcode;
734 Intrinsic::ID Int64Id, Int128Id;
735 };
736
737 static Scalar ScalarInts[] = {
738#define GET_SCALAR_INTRINSICS
740#undef GET_SCALAR_INTRINSICS
741 };
742
743 static Hvx HvxInts[] = {
744#define GET_HVX_INTRINSICS
746#undef GET_HVX_INTRINSICS
747 };
748
749 const auto CmpOpcode = [](auto A, auto B) { return A.Opcode < B.Opcode; };
750 [[maybe_unused]] static bool SortedScalar =
751 (llvm::sort(ScalarInts, CmpOpcode), true);
752 [[maybe_unused]] static bool SortedHvx =
753 (llvm::sort(HvxInts, CmpOpcode), true);
754
755 auto [BS, ES] = std::make_pair(std::begin(ScalarInts), std::end(ScalarInts));
756 auto [BH, EH] = std::make_pair(std::begin(HvxInts), std::end(HvxInts));
757
758 auto FoundScalar = std::lower_bound(BS, ES, Scalar{Opc, 0}, CmpOpcode);
759 if (FoundScalar != ES && FoundScalar->Opcode == Opc)
760 return FoundScalar->IntId;
761
762 auto FoundHvx = std::lower_bound(BH, EH, Hvx{Opc, 0, 0}, CmpOpcode);
763 if (FoundHvx != EH && FoundHvx->Opcode == Opc) {
764 unsigned HwLen = getVectorLength();
765 if (HwLen == 64)
766 return FoundHvx->Int64Id;
767 if (HwLen == 128)
768 return FoundHvx->Int128Id;
769 }
770
771 std::string error = "Invalid opcode (" + std::to_string(Opc) + ")";
772 llvm_unreachable(error.c_str());
773 return 0;
774}
static const LLT S1
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
const HexagonInstrInfo * TII
static cl::opt< bool > DisableHexagonMISched("disable-hexagon-misched", cl::Hidden, cl::desc("Disable Hexagon MI Scheduling"))
static cl::opt< bool > EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::init(true), cl::desc("Enable the scheduler to generate .cur"))
static cl::opt< bool > EnableCheckBankConflict("hexagon-check-bank-conflict", cl::Hidden, cl::init(true), cl::desc("Enable checking for cache bank conflicts"))
static cl::opt< bool > OverrideLongCalls("hexagon-long-calls", cl::Hidden, cl::desc("If present, forces/disables the use of long calls"))
static cl::opt< bool > SchedPredsCloser("sched-preds-closer", cl::Hidden, cl::init(true))
static cl::opt< bool > SchedRetvalOptimization("sched-retval-optimization", cl::Hidden, cl::init(true))
static cl::opt< bool > EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden, cl::init(false))
static cl::opt< bool > EnableBSBSched("enable-bsb-sched", cl::Hidden, cl::init(true))
static SUnit * getZeroLatency(SUnit *N, SmallVector< SDep, 4 > &Deps)
If the SUnit has a zero latency edge, return the other SUnit.
static cl::opt< bool > EnablePredicatedCalls("hexagon-pred-calls", cl::Hidden, cl::desc("Consider calls to be predicable"))
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
#define error(X)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
bool erase(const KeyT &Val)
Definition: DenseMap.h:336
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
Container class for subtarget features.
constexpr FeatureBitset & reset(unsigned I)
unsigned getAddrMode(const MachineInstr &MI) const
bool canExecuteInBundle(const MachineInstr &First, const MachineInstr &Second) const
Can these instructions execute at the same time in a bundle.
std::optional< unsigned > getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
getOperandLatency - Compute and return the use operand latency of a given pair of def and use.
bool isToBeScheduledASAP(const MachineInstr &MI1, const MachineInstr &MI2) const
MachineOperand * getBaseAndOffset(const MachineInstr &MI, int64_t &Offset, LocationSize &AccessSize) const
uint64_t getType(const MachineInstr &MI) const
Hexagon::ArchEnum HexagonArchVersion
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
Perform target specific adjustments to the latency of a schedule dependency.
const HexagonInstrInfo * getInstrInfo() const override
const HexagonRegisterInfo * getRegisterInfo() const override
void getSMSMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM)
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
void getPostRAMutations(std::vector< std::unique_ptr< ScheduleDAGMutation > > &Mutations) const override
const HexagonTargetLowering * getTargetLowering() const override
bool UseBSBScheduling
True if the target should use Back-Skip-Back scheduling.
unsigned getL1PrefetchDistance() const
ArrayRef< MVT > getHVXElementTypes() const
bool useHVXFloatingPoint() const
bool enableSubRegLiveness() const override
CodeGenOptLevel OptLevel
unsigned getVectorLength() const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
ParseSubtargetFeatures - Parses features string setting specified subtarget options.
unsigned getL1CacheLineSize() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
HexagonSubtarget & initializeSubtargetDependencies(StringRef CPU, StringRef FS)
bool enableMachineScheduler() const override
bool isHVXElementType(MVT Ty, bool IncludeBool=false) const
bool useAA() const override
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
const InstrItinerary * Itineraries
Array of itineraries selected.
bool hasValue() const
TypeSize getValue() const
MCRegAliasIterator enumerates all registers aliasing Reg.
Machine Value Type.
bool isVector() const
Return true if this is a vector value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isValid() const
Return true if this is a valid simple valuetype.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
bool isCopy() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool isRegSequence() const
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Scheduling dependency.
Definition: ScheduleDAG.h:49
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
@ Order
Any other ordering dependency.
Definition: ScheduleDAG.h:56
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
@ Barrier
An unknown scheduling barrier.
Definition: ScheduleDAG.h:69
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
Definition: ScheduleDAG.h:378
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:270
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:263
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:262
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:390
A ScheduleDAG for scheduling lists of MachineInstr.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:575
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:579
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:577
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
bool consumeInteger(unsigned Radix, T &Result)
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:496
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:262
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:606
Manages the enabling and disabling of subtarget specific features.
const std::vector< std::string > & getFeatures() const
Returns the vector of individual subtarget features.
std::string getString() const
Returns features as a string.
void AddFeature(StringRef String, bool Enable=true)
Adds Features.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
Provide an instruction scheduling machine model to CodeGen passes.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:261
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:343
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void addArchSubtarget(MCSubtargetInfo const *STI, StringRef FS)
FeatureBitset completeHVXFeatures(const FeatureBitset &FB)
std::optional< Hexagon::ArchEnum > getCpu(StringRef CPU)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:394
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1928
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886
cl::opt< bool > HexagonDisableDuplex
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
#define N
Extended Value Type.
Definition: ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:359
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:275
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:307
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:319
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:327
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override
void apply(ScheduleDAGInstrs *DAG) override