LLVM 20.0.0git
X86InstrInfo.cpp
Go to the documentation of this file.
1//===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86InstrInfo.h"
14#include "X86.h"
15#include "X86InstrBuilder.h"
16#include "X86InstrFoldTables.h"
18#include "X86Subtarget.h"
19#include "X86TargetMachine.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Sequence.h"
36#include "llvm/IR/Function.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Module.h"
39#include "llvm/MC/MCAsmInfo.h"
40#include "llvm/MC/MCExpr.h"
41#include "llvm/MC/MCInst.h"
43#include "llvm/Support/Debug.h"
47#include <optional>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "x86-instr-info"
52
53#define GET_INSTRINFO_CTOR_DTOR
54#include "X86GenInstrInfo.inc"
55
56static cl::opt<bool>
57 NoFusing("disable-spill-fusing",
58 cl::desc("Disable fusing of spill code into instructions"),
60static cl::opt<bool>
61 PrintFailedFusing("print-failed-fuse-candidates",
62 cl::desc("Print instructions that the allocator wants to"
63 " fuse, but the X86 backend currently can't"),
65static cl::opt<bool>
66 ReMatPICStubLoad("remat-pic-stub-load",
67 cl::desc("Re-materialize load from stub in PIC mode"),
68 cl::init(false), cl::Hidden);
70 PartialRegUpdateClearance("partial-reg-update-clearance",
71 cl::desc("Clearance between two register writes "
72 "for inserting XOR to avoid partial "
73 "register update"),
74 cl::init(64), cl::Hidden);
76 "undef-reg-clearance",
77 cl::desc("How many idle instructions we would like before "
78 "certain undef register reads"),
79 cl::init(128), cl::Hidden);
80
81// Pin the vtable to this file.
82void X86InstrInfo::anchor() {}
83
85 : X86GenInstrInfo((STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64
86 : X86::ADJCALLSTACKDOWN32),
87 (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64
88 : X86::ADJCALLSTACKUP32),
89 X86::CATCHRET, (STI.is64Bit() ? X86::RET64 : X86::RET32)),
90 Subtarget(STI), RI(STI.getTargetTriple()) {}
91
93X86InstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
95 const MachineFunction &MF) const {
96 auto *RC = TargetInstrInfo::getRegClass(MCID, OpNum, TRI, MF);
97 // If the target does not have egpr, then r16-r31 will be resereved for all
98 // instructions.
99 if (!RC || !Subtarget.hasEGPR())
100 return RC;
101
103 return RC;
104
105 switch (RC->getID()) {
106 default:
107 return RC;
108 case X86::GR8RegClassID:
109 return &X86::GR8_NOREX2RegClass;
110 case X86::GR16RegClassID:
111 return &X86::GR16_NOREX2RegClass;
112 case X86::GR32RegClassID:
113 return &X86::GR32_NOREX2RegClass;
114 case X86::GR64RegClassID:
115 return &X86::GR64_NOREX2RegClass;
116 case X86::GR32_NOSPRegClassID:
117 return &X86::GR32_NOREX2_NOSPRegClass;
118 case X86::GR64_NOSPRegClassID:
119 return &X86::GR64_NOREX2_NOSPRegClass;
120 }
121}
122
124 Register &SrcReg, Register &DstReg,
125 unsigned &SubIdx) const {
126 switch (MI.getOpcode()) {
127 default:
128 break;
129 case X86::MOVSX16rr8:
130 case X86::MOVZX16rr8:
131 case X86::MOVSX32rr8:
132 case X86::MOVZX32rr8:
133 case X86::MOVSX64rr8:
134 if (!Subtarget.is64Bit())
135 // It's not always legal to reference the low 8-bit of the larger
136 // register in 32-bit mode.
137 return false;
138 [[fallthrough]];
139 case X86::MOVSX32rr16:
140 case X86::MOVZX32rr16:
141 case X86::MOVSX64rr16:
142 case X86::MOVSX64rr32: {
143 if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
144 // Be conservative.
145 return false;
146 SrcReg = MI.getOperand(1).getReg();
147 DstReg = MI.getOperand(0).getReg();
148 switch (MI.getOpcode()) {
149 default:
150 llvm_unreachable("Unreachable!");
151 case X86::MOVSX16rr8:
152 case X86::MOVZX16rr8:
153 case X86::MOVSX32rr8:
154 case X86::MOVZX32rr8:
155 case X86::MOVSX64rr8:
156 SubIdx = X86::sub_8bit;
157 break;
158 case X86::MOVSX32rr16:
159 case X86::MOVZX32rr16:
160 case X86::MOVSX64rr16:
161 SubIdx = X86::sub_16bit;
162 break;
163 case X86::MOVSX64rr32:
164 SubIdx = X86::sub_32bit;
165 break;
166 }
167 return true;
168 }
169 }
170 return false;
171}
172
174 if (MI.mayLoad() || MI.mayStore())
175 return false;
176
177 // Some target-independent operations that trivially lower to data-invariant
178 // instructions.
179 if (MI.isCopyLike() || MI.isInsertSubreg())
180 return true;
181
182 unsigned Opcode = MI.getOpcode();
183 using namespace X86;
184 // On x86 it is believed that imul is constant time w.r.t. the loaded data.
185 // However, they set flags and are perhaps the most surprisingly constant
186 // time operations so we call them out here separately.
187 if (isIMUL(Opcode))
188 return true;
189 // Bit scanning and counting instructions that are somewhat surprisingly
190 // constant time as they scan across bits and do other fairly complex
191 // operations like popcnt, but are believed to be constant time on x86.
192 // However, these set flags.
193 if (isBSF(Opcode) || isBSR(Opcode) || isLZCNT(Opcode) || isPOPCNT(Opcode) ||
194 isTZCNT(Opcode))
195 return true;
196 // Bit manipulation instructions are effectively combinations of basic
197 // arithmetic ops, and should still execute in constant time. These also
198 // set flags.
199 if (isBLCFILL(Opcode) || isBLCI(Opcode) || isBLCIC(Opcode) ||
200 isBLCMSK(Opcode) || isBLCS(Opcode) || isBLSFILL(Opcode) ||
201 isBLSI(Opcode) || isBLSIC(Opcode) || isBLSMSK(Opcode) || isBLSR(Opcode) ||
202 isTZMSK(Opcode))
203 return true;
204 // Bit extracting and clearing instructions should execute in constant time,
205 // and set flags.
206 if (isBEXTR(Opcode) || isBZHI(Opcode))
207 return true;
208 // Shift and rotate.
209 if (isROL(Opcode) || isROR(Opcode) || isSAR(Opcode) || isSHL(Opcode) ||
210 isSHR(Opcode) || isSHLD(Opcode) || isSHRD(Opcode))
211 return true;
212 // Basic arithmetic is constant time on the input but does set flags.
213 if (isADC(Opcode) || isADD(Opcode) || isAND(Opcode) || isOR(Opcode) ||
214 isSBB(Opcode) || isSUB(Opcode) || isXOR(Opcode))
215 return true;
216 // Arithmetic with just 32-bit and 64-bit variants and no immediates.
217 if (isANDN(Opcode))
218 return true;
219 // Unary arithmetic operations.
220 if (isDEC(Opcode) || isINC(Opcode) || isNEG(Opcode))
221 return true;
222 // Unlike other arithmetic, NOT doesn't set EFLAGS.
223 if (isNOT(Opcode))
224 return true;
225 // Various move instructions used to zero or sign extend things. Note that we
226 // intentionally don't support the _NOREX variants as we can't handle that
227 // register constraint anyways.
228 if (isMOVSX(Opcode) || isMOVZX(Opcode) || isMOVSXD(Opcode) || isMOV(Opcode))
229 return true;
230 // Arithmetic instructions that are both constant time and don't set flags.
231 if (isRORX(Opcode) || isSARX(Opcode) || isSHLX(Opcode) || isSHRX(Opcode))
232 return true;
233 // LEA doesn't actually access memory, and its arithmetic is constant time.
234 if (isLEA(Opcode))
235 return true;
236 // By default, assume that the instruction is not data invariant.
237 return false;
238}
239
241 switch (MI.getOpcode()) {
242 default:
243 // By default, assume that the load will immediately leak.
244 return false;
245
246 // On x86 it is believed that imul is constant time w.r.t. the loaded data.
247 // However, they set flags and are perhaps the most surprisingly constant
248 // time operations so we call them out here separately.
249 case X86::IMUL16rm:
250 case X86::IMUL16rmi:
251 case X86::IMUL32rm:
252 case X86::IMUL32rmi:
253 case X86::IMUL64rm:
254 case X86::IMUL64rmi32:
255
256 // Bit scanning and counting instructions that are somewhat surprisingly
257 // constant time as they scan across bits and do other fairly complex
258 // operations like popcnt, but are believed to be constant time on x86.
259 // However, these set flags.
260 case X86::BSF16rm:
261 case X86::BSF32rm:
262 case X86::BSF64rm:
263 case X86::BSR16rm:
264 case X86::BSR32rm:
265 case X86::BSR64rm:
266 case X86::LZCNT16rm:
267 case X86::LZCNT32rm:
268 case X86::LZCNT64rm:
269 case X86::POPCNT16rm:
270 case X86::POPCNT32rm:
271 case X86::POPCNT64rm:
272 case X86::TZCNT16rm:
273 case X86::TZCNT32rm:
274 case X86::TZCNT64rm:
275
276 // Bit manipulation instructions are effectively combinations of basic
277 // arithmetic ops, and should still execute in constant time. These also
278 // set flags.
279 case X86::BLCFILL32rm:
280 case X86::BLCFILL64rm:
281 case X86::BLCI32rm:
282 case X86::BLCI64rm:
283 case X86::BLCIC32rm:
284 case X86::BLCIC64rm:
285 case X86::BLCMSK32rm:
286 case X86::BLCMSK64rm:
287 case X86::BLCS32rm:
288 case X86::BLCS64rm:
289 case X86::BLSFILL32rm:
290 case X86::BLSFILL64rm:
291 case X86::BLSI32rm:
292 case X86::BLSI64rm:
293 case X86::BLSIC32rm:
294 case X86::BLSIC64rm:
295 case X86::BLSMSK32rm:
296 case X86::BLSMSK64rm:
297 case X86::BLSR32rm:
298 case X86::BLSR64rm:
299 case X86::TZMSK32rm:
300 case X86::TZMSK64rm:
301
302 // Bit extracting and clearing instructions should execute in constant time,
303 // and set flags.
304 case X86::BEXTR32rm:
305 case X86::BEXTR64rm:
306 case X86::BEXTRI32mi:
307 case X86::BEXTRI64mi:
308 case X86::BZHI32rm:
309 case X86::BZHI64rm:
310
311 // Basic arithmetic is constant time on the input but does set flags.
312 case X86::ADC8rm:
313 case X86::ADC16rm:
314 case X86::ADC32rm:
315 case X86::ADC64rm:
316 case X86::ADD8rm:
317 case X86::ADD16rm:
318 case X86::ADD32rm:
319 case X86::ADD64rm:
320 case X86::AND8rm:
321 case X86::AND16rm:
322 case X86::AND32rm:
323 case X86::AND64rm:
324 case X86::ANDN32rm:
325 case X86::ANDN64rm:
326 case X86::OR8rm:
327 case X86::OR16rm:
328 case X86::OR32rm:
329 case X86::OR64rm:
330 case X86::SBB8rm:
331 case X86::SBB16rm:
332 case X86::SBB32rm:
333 case X86::SBB64rm:
334 case X86::SUB8rm:
335 case X86::SUB16rm:
336 case X86::SUB32rm:
337 case X86::SUB64rm:
338 case X86::XOR8rm:
339 case X86::XOR16rm:
340 case X86::XOR32rm:
341 case X86::XOR64rm:
342
343 // Integer multiply w/o affecting flags is still believed to be constant
344 // time on x86. Called out separately as this is among the most surprising
345 // instructions to exhibit that behavior.
346 case X86::MULX32rm:
347 case X86::MULX64rm:
348
349 // Arithmetic instructions that are both constant time and don't set flags.
350 case X86::RORX32mi:
351 case X86::RORX64mi:
352 case X86::SARX32rm:
353 case X86::SARX64rm:
354 case X86::SHLX32rm:
355 case X86::SHLX64rm:
356 case X86::SHRX32rm:
357 case X86::SHRX64rm:
358
359 // Conversions are believed to be constant time and don't set flags.
360 case X86::CVTTSD2SI64rm:
361 case X86::VCVTTSD2SI64rm:
362 case X86::VCVTTSD2SI64Zrm:
363 case X86::CVTTSD2SIrm:
364 case X86::VCVTTSD2SIrm:
365 case X86::VCVTTSD2SIZrm:
366 case X86::CVTTSS2SI64rm:
367 case X86::VCVTTSS2SI64rm:
368 case X86::VCVTTSS2SI64Zrm:
369 case X86::CVTTSS2SIrm:
370 case X86::VCVTTSS2SIrm:
371 case X86::VCVTTSS2SIZrm:
372 case X86::CVTSI2SDrm:
373 case X86::VCVTSI2SDrm:
374 case X86::VCVTSI2SDZrm:
375 case X86::CVTSI2SSrm:
376 case X86::VCVTSI2SSrm:
377 case X86::VCVTSI2SSZrm:
378 case X86::CVTSI642SDrm:
379 case X86::VCVTSI642SDrm:
380 case X86::VCVTSI642SDZrm:
381 case X86::CVTSI642SSrm:
382 case X86::VCVTSI642SSrm:
383 case X86::VCVTSI642SSZrm:
384 case X86::CVTSS2SDrm:
385 case X86::VCVTSS2SDrm:
386 case X86::VCVTSS2SDZrm:
387 case X86::CVTSD2SSrm:
388 case X86::VCVTSD2SSrm:
389 case X86::VCVTSD2SSZrm:
390 // AVX512 added unsigned integer conversions.
391 case X86::VCVTTSD2USI64Zrm:
392 case X86::VCVTTSD2USIZrm:
393 case X86::VCVTTSS2USI64Zrm:
394 case X86::VCVTTSS2USIZrm:
395 case X86::VCVTUSI2SDZrm:
396 case X86::VCVTUSI642SDZrm:
397 case X86::VCVTUSI2SSZrm:
398 case X86::VCVTUSI642SSZrm:
399
400 // Loads to register don't set flags.
401 case X86::MOV8rm:
402 case X86::MOV8rm_NOREX:
403 case X86::MOV16rm:
404 case X86::MOV32rm:
405 case X86::MOV64rm:
406 case X86::MOVSX16rm8:
407 case X86::MOVSX32rm16:
408 case X86::MOVSX32rm8:
409 case X86::MOVSX32rm8_NOREX:
410 case X86::MOVSX64rm16:
411 case X86::MOVSX64rm32:
412 case X86::MOVSX64rm8:
413 case X86::MOVZX16rm8:
414 case X86::MOVZX32rm16:
415 case X86::MOVZX32rm8:
416 case X86::MOVZX32rm8_NOREX:
417 case X86::MOVZX64rm16:
418 case X86::MOVZX64rm8:
419 return true;
420 }
421}
422
424 const MachineFunction *MF = MI.getParent()->getParent();
426
427 if (isFrameInstr(MI)) {
428 int SPAdj = alignTo(getFrameSize(MI), TFI->getStackAlign());
429 SPAdj -= getFrameAdjustment(MI);
430 if (!isFrameSetup(MI))
431 SPAdj = -SPAdj;
432 return SPAdj;
433 }
434
435 // To know whether a call adjusts the stack, we need information
436 // that is bound to the following ADJCALLSTACKUP pseudo.
437 // Look for the next ADJCALLSTACKUP that follows the call.
438 if (MI.isCall()) {
439 const MachineBasicBlock *MBB = MI.getParent();
441 for (auto E = MBB->end(); I != E; ++I) {
442 if (I->getOpcode() == getCallFrameDestroyOpcode() || I->isCall())
443 break;
444 }
445
446 // If we could not find a frame destroy opcode, then it has already
447 // been simplified, so we don't care.
448 if (I->getOpcode() != getCallFrameDestroyOpcode())
449 return 0;
450
451 return -(I->getOperand(1).getImm());
452 }
453
454 // Currently handle only PUSHes we can reasonably expect to see
455 // in call sequences
456 switch (MI.getOpcode()) {
457 default:
458 return 0;
459 case X86::PUSH32r:
460 case X86::PUSH32rmm:
461 case X86::PUSH32rmr:
462 case X86::PUSH32i:
463 return 4;
464 case X86::PUSH64r:
465 case X86::PUSH64rmm:
466 case X86::PUSH64rmr:
467 case X86::PUSH64i32:
468 return 8;
469 }
470}
471
472/// Return true and the FrameIndex if the specified
473/// operand and follow operands form a reference to the stack frame.
474bool X86InstrInfo::isFrameOperand(const MachineInstr &MI, unsigned int Op,
475 int &FrameIndex) const {
476 if (MI.getOperand(Op + X86::AddrBaseReg).isFI() &&
477 MI.getOperand(Op + X86::AddrScaleAmt).isImm() &&
478 MI.getOperand(Op + X86::AddrIndexReg).isReg() &&
479 MI.getOperand(Op + X86::AddrDisp).isImm() &&
480 MI.getOperand(Op + X86::AddrScaleAmt).getImm() == 1 &&
481 MI.getOperand(Op + X86::AddrIndexReg).getReg() == 0 &&
482 MI.getOperand(Op + X86::AddrDisp).getImm() == 0) {
483 FrameIndex = MI.getOperand(Op + X86::AddrBaseReg).getIndex();
484 return true;
485 }
486 return false;
487}
488
489static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
490 switch (Opcode) {
491 default:
492 return false;
493 case X86::MOV8rm:
494 case X86::KMOVBkm:
495 case X86::KMOVBkm_EVEX:
496 MemBytes = 1;
497 return true;
498 case X86::MOV16rm:
499 case X86::KMOVWkm:
500 case X86::KMOVWkm_EVEX:
501 case X86::VMOVSHZrm:
502 case X86::VMOVSHZrm_alt:
503 MemBytes = 2;
504 return true;
505 case X86::MOV32rm:
506 case X86::MOVSSrm:
507 case X86::MOVSSrm_alt:
508 case X86::VMOVSSrm:
509 case X86::VMOVSSrm_alt:
510 case X86::VMOVSSZrm:
511 case X86::VMOVSSZrm_alt:
512 case X86::KMOVDkm:
513 case X86::KMOVDkm_EVEX:
514 MemBytes = 4;
515 return true;
516 case X86::MOV64rm:
517 case X86::LD_Fp64m:
518 case X86::MOVSDrm:
519 case X86::MOVSDrm_alt:
520 case X86::VMOVSDrm:
521 case X86::VMOVSDrm_alt:
522 case X86::VMOVSDZrm:
523 case X86::VMOVSDZrm_alt:
524 case X86::MMX_MOVD64rm:
525 case X86::MMX_MOVQ64rm:
526 case X86::KMOVQkm:
527 case X86::KMOVQkm_EVEX:
528 MemBytes = 8;
529 return true;
530 case X86::MOVAPSrm:
531 case X86::MOVUPSrm:
532 case X86::MOVAPDrm:
533 case X86::MOVUPDrm:
534 case X86::MOVDQArm:
535 case X86::MOVDQUrm:
536 case X86::VMOVAPSrm:
537 case X86::VMOVUPSrm:
538 case X86::VMOVAPDrm:
539 case X86::VMOVUPDrm:
540 case X86::VMOVDQArm:
541 case X86::VMOVDQUrm:
542 case X86::VMOVAPSZ128rm:
543 case X86::VMOVUPSZ128rm:
544 case X86::VMOVAPSZ128rm_NOVLX:
545 case X86::VMOVUPSZ128rm_NOVLX:
546 case X86::VMOVAPDZ128rm:
547 case X86::VMOVUPDZ128rm:
548 case X86::VMOVDQU8Z128rm:
549 case X86::VMOVDQU16Z128rm:
550 case X86::VMOVDQA32Z128rm:
551 case X86::VMOVDQU32Z128rm:
552 case X86::VMOVDQA64Z128rm:
553 case X86::VMOVDQU64Z128rm:
554 MemBytes = 16;
555 return true;
556 case X86::VMOVAPSYrm:
557 case X86::VMOVUPSYrm:
558 case X86::VMOVAPDYrm:
559 case X86::VMOVUPDYrm:
560 case X86::VMOVDQAYrm:
561 case X86::VMOVDQUYrm:
562 case X86::VMOVAPSZ256rm:
563 case X86::VMOVUPSZ256rm:
564 case X86::VMOVAPSZ256rm_NOVLX:
565 case X86::VMOVUPSZ256rm_NOVLX:
566 case X86::VMOVAPDZ256rm:
567 case X86::VMOVUPDZ256rm:
568 case X86::VMOVDQU8Z256rm:
569 case X86::VMOVDQU16Z256rm:
570 case X86::VMOVDQA32Z256rm:
571 case X86::VMOVDQU32Z256rm:
572 case X86::VMOVDQA64Z256rm:
573 case X86::VMOVDQU64Z256rm:
574 MemBytes = 32;
575 return true;
576 case X86::VMOVAPSZrm:
577 case X86::VMOVUPSZrm:
578 case X86::VMOVAPDZrm:
579 case X86::VMOVUPDZrm:
580 case X86::VMOVDQU8Zrm:
581 case X86::VMOVDQU16Zrm:
582 case X86::VMOVDQA32Zrm:
583 case X86::VMOVDQU32Zrm:
584 case X86::VMOVDQA64Zrm:
585 case X86::VMOVDQU64Zrm:
586 MemBytes = 64;
587 return true;
588 }
589}
590
591static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
592 switch (Opcode) {
593 default:
594 return false;
595 case X86::MOV8mr:
596 case X86::KMOVBmk:
597 case X86::KMOVBmk_EVEX:
598 MemBytes = 1;
599 return true;
600 case X86::MOV16mr:
601 case X86::KMOVWmk:
602 case X86::KMOVWmk_EVEX:
603 case X86::VMOVSHZmr:
604 MemBytes = 2;
605 return true;
606 case X86::MOV32mr:
607 case X86::MOVSSmr:
608 case X86::VMOVSSmr:
609 case X86::VMOVSSZmr:
610 case X86::KMOVDmk:
611 case X86::KMOVDmk_EVEX:
612 MemBytes = 4;
613 return true;
614 case X86::MOV64mr:
615 case X86::ST_FpP64m:
616 case X86::MOVSDmr:
617 case X86::VMOVSDmr:
618 case X86::VMOVSDZmr:
619 case X86::MMX_MOVD64mr:
620 case X86::MMX_MOVQ64mr:
621 case X86::MMX_MOVNTQmr:
622 case X86::KMOVQmk:
623 case X86::KMOVQmk_EVEX:
624 MemBytes = 8;
625 return true;
626 case X86::MOVAPSmr:
627 case X86::MOVUPSmr:
628 case X86::MOVAPDmr:
629 case X86::MOVUPDmr:
630 case X86::MOVDQAmr:
631 case X86::MOVDQUmr:
632 case X86::VMOVAPSmr:
633 case X86::VMOVUPSmr:
634 case X86::VMOVAPDmr:
635 case X86::VMOVUPDmr:
636 case X86::VMOVDQAmr:
637 case X86::VMOVDQUmr:
638 case X86::VMOVUPSZ128mr:
639 case X86::VMOVAPSZ128mr:
640 case X86::VMOVUPSZ128mr_NOVLX:
641 case X86::VMOVAPSZ128mr_NOVLX:
642 case X86::VMOVUPDZ128mr:
643 case X86::VMOVAPDZ128mr:
644 case X86::VMOVDQA32Z128mr:
645 case X86::VMOVDQU32Z128mr:
646 case X86::VMOVDQA64Z128mr:
647 case X86::VMOVDQU64Z128mr:
648 case X86::VMOVDQU8Z128mr:
649 case X86::VMOVDQU16Z128mr:
650 MemBytes = 16;
651 return true;
652 case X86::VMOVUPSYmr:
653 case X86::VMOVAPSYmr:
654 case X86::VMOVUPDYmr:
655 case X86::VMOVAPDYmr:
656 case X86::VMOVDQUYmr:
657 case X86::VMOVDQAYmr:
658 case X86::VMOVUPSZ256mr:
659 case X86::VMOVAPSZ256mr:
660 case X86::VMOVUPSZ256mr_NOVLX:
661 case X86::VMOVAPSZ256mr_NOVLX:
662 case X86::VMOVUPDZ256mr:
663 case X86::VMOVAPDZ256mr:
664 case X86::VMOVDQU8Z256mr:
665 case X86::VMOVDQU16Z256mr:
666 case X86::VMOVDQA32Z256mr:
667 case X86::VMOVDQU32Z256mr:
668 case X86::VMOVDQA64Z256mr:
669 case X86::VMOVDQU64Z256mr:
670 MemBytes = 32;
671 return true;
672 case X86::VMOVUPSZmr:
673 case X86::VMOVAPSZmr:
674 case X86::VMOVUPDZmr:
675 case X86::VMOVAPDZmr:
676 case X86::VMOVDQU8Zmr:
677 case X86::VMOVDQU16Zmr:
678 case X86::VMOVDQA32Zmr:
679 case X86::VMOVDQU32Zmr:
680 case X86::VMOVDQA64Zmr:
681 case X86::VMOVDQU64Zmr:
682 MemBytes = 64;
683 return true;
684 }
685 return false;
686}
687
689 int &FrameIndex) const {
690 unsigned Dummy;
691 return X86InstrInfo::isLoadFromStackSlot(MI, FrameIndex, Dummy);
692}
693
695 int &FrameIndex,
696 unsigned &MemBytes) const {
697 if (isFrameLoadOpcode(MI.getOpcode(), MemBytes))
698 if (MI.getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
699 return MI.getOperand(0).getReg();
700 return 0;
701}
702
704 int &FrameIndex) const {
705 unsigned Dummy;
706 if (isFrameLoadOpcode(MI.getOpcode(), Dummy)) {
707 unsigned Reg;
708 if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
709 return Reg;
710 // Check for post-frame index elimination operations
712 if (hasLoadFromStackSlot(MI, Accesses)) {
713 FrameIndex =
714 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
715 ->getFrameIndex();
716 return MI.getOperand(0).getReg();
717 }
718 }
719 return 0;
720}
721
723 int &FrameIndex) const {
724 unsigned Dummy;
725 return X86InstrInfo::isStoreToStackSlot(MI, FrameIndex, Dummy);
726}
727
729 int &FrameIndex,
730 unsigned &MemBytes) const {
731 if (isFrameStoreOpcode(MI.getOpcode(), MemBytes))
732 if (MI.getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
733 isFrameOperand(MI, 0, FrameIndex))
734 return MI.getOperand(X86::AddrNumOperands).getReg();
735 return 0;
736}
737
739 int &FrameIndex) const {
740 unsigned Dummy;
741 if (isFrameStoreOpcode(MI.getOpcode(), Dummy)) {
742 unsigned Reg;
743 if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
744 return Reg;
745 // Check for post-frame index elimination operations
747 if (hasStoreToStackSlot(MI, Accesses)) {
748 FrameIndex =
749 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
750 ->getFrameIndex();
751 return MI.getOperand(X86::AddrNumOperands).getReg();
752 }
753 }
754 return 0;
755}
756
757/// Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
758static bool regIsPICBase(Register BaseReg, const MachineRegisterInfo &MRI) {
759 // Don't waste compile time scanning use-def chains of physregs.
760 if (!BaseReg.isVirtual())
761 return false;
762 bool isPICBase = false;
763 for (const MachineInstr &DefMI : MRI.def_instructions(BaseReg)) {
764 if (DefMI.getOpcode() != X86::MOVPC32r)
765 return false;
766 assert(!isPICBase && "More than one PIC base?");
767 isPICBase = true;
768 }
769 return isPICBase;
770}
771
773 const MachineInstr &MI) const {
774 switch (MI.getOpcode()) {
775 default:
776 // This function should only be called for opcodes with the ReMaterializable
777 // flag set.
778 llvm_unreachable("Unknown rematerializable operation!");
779 break;
780 case X86::IMPLICIT_DEF:
781 // Defer to generic logic.
782 break;
783 case X86::LOAD_STACK_GUARD:
784 case X86::LD_Fp032:
785 case X86::LD_Fp064:
786 case X86::LD_Fp080:
787 case X86::LD_Fp132:
788 case X86::LD_Fp164:
789 case X86::LD_Fp180:
790 case X86::AVX1_SETALLONES:
791 case X86::AVX2_SETALLONES:
792 case X86::AVX512_128_SET0:
793 case X86::AVX512_256_SET0:
794 case X86::AVX512_512_SET0:
795 case X86::AVX512_512_SETALLONES:
796 case X86::AVX512_FsFLD0SD:
797 case X86::AVX512_FsFLD0SH:
798 case X86::AVX512_FsFLD0SS:
799 case X86::AVX512_FsFLD0F128:
800 case X86::AVX_SET0:
801 case X86::FsFLD0SD:
802 case X86::FsFLD0SS:
803 case X86::FsFLD0SH:
804 case X86::FsFLD0F128:
805 case X86::KSET0D:
806 case X86::KSET0Q:
807 case X86::KSET0W:
808 case X86::KSET1D:
809 case X86::KSET1Q:
810 case X86::KSET1W:
811 case X86::MMX_SET0:
812 case X86::MOV32ImmSExti8:
813 case X86::MOV32r0:
814 case X86::MOV32r1:
815 case X86::MOV32r_1:
816 case X86::MOV32ri64:
817 case X86::MOV64ImmSExti8:
818 case X86::V_SET0:
819 case X86::V_SETALLONES:
820 case X86::MOV16ri:
821 case X86::MOV32ri:
822 case X86::MOV64ri:
823 case X86::MOV64ri32:
824 case X86::MOV8ri:
825 case X86::PTILEZEROV:
826 return true;
827
828 case X86::MOV8rm:
829 case X86::MOV8rm_NOREX:
830 case X86::MOV16rm:
831 case X86::MOV32rm:
832 case X86::MOV64rm:
833 case X86::MOVSSrm:
834 case X86::MOVSSrm_alt:
835 case X86::MOVSDrm:
836 case X86::MOVSDrm_alt:
837 case X86::MOVAPSrm:
838 case X86::MOVUPSrm:
839 case X86::MOVAPDrm:
840 case X86::MOVUPDrm:
841 case X86::MOVDQArm:
842 case X86::MOVDQUrm:
843 case X86::VMOVSSrm:
844 case X86::VMOVSSrm_alt:
845 case X86::VMOVSDrm:
846 case X86::VMOVSDrm_alt:
847 case X86::VMOVAPSrm:
848 case X86::VMOVUPSrm:
849 case X86::VMOVAPDrm:
850 case X86::VMOVUPDrm:
851 case X86::VMOVDQArm:
852 case X86::VMOVDQUrm:
853 case X86::VMOVAPSYrm:
854 case X86::VMOVUPSYrm:
855 case X86::VMOVAPDYrm:
856 case X86::VMOVUPDYrm:
857 case X86::VMOVDQAYrm:
858 case X86::VMOVDQUYrm:
859 case X86::MMX_MOVD64rm:
860 case X86::MMX_MOVQ64rm:
861 case X86::VBROADCASTSSrm:
862 case X86::VBROADCASTSSYrm:
863 case X86::VBROADCASTSDYrm:
864 // AVX-512
865 case X86::VPBROADCASTBZ128rm:
866 case X86::VPBROADCASTBZ256rm:
867 case X86::VPBROADCASTBZrm:
868 case X86::VBROADCASTF32X2Z256rm:
869 case X86::VBROADCASTF32X2Zrm:
870 case X86::VBROADCASTI32X2Z128rm:
871 case X86::VBROADCASTI32X2Z256rm:
872 case X86::VBROADCASTI32X2Zrm:
873 case X86::VPBROADCASTWZ128rm:
874 case X86::VPBROADCASTWZ256rm:
875 case X86::VPBROADCASTWZrm:
876 case X86::VPBROADCASTDZ128rm:
877 case X86::VPBROADCASTDZ256rm:
878 case X86::VPBROADCASTDZrm:
879 case X86::VBROADCASTSSZ128rm:
880 case X86::VBROADCASTSSZ256rm:
881 case X86::VBROADCASTSSZrm:
882 case X86::VPBROADCASTQZ128rm:
883 case X86::VPBROADCASTQZ256rm:
884 case X86::VPBROADCASTQZrm:
885 case X86::VBROADCASTSDZ256rm:
886 case X86::VBROADCASTSDZrm:
887 case X86::VMOVSSZrm:
888 case X86::VMOVSSZrm_alt:
889 case X86::VMOVSDZrm:
890 case X86::VMOVSDZrm_alt:
891 case X86::VMOVSHZrm:
892 case X86::VMOVSHZrm_alt:
893 case X86::VMOVAPDZ128rm:
894 case X86::VMOVAPDZ256rm:
895 case X86::VMOVAPDZrm:
896 case X86::VMOVAPSZ128rm:
897 case X86::VMOVAPSZ256rm:
898 case X86::VMOVAPSZ128rm_NOVLX:
899 case X86::VMOVAPSZ256rm_NOVLX:
900 case X86::VMOVAPSZrm:
901 case X86::VMOVDQA32Z128rm:
902 case X86::VMOVDQA32Z256rm:
903 case X86::VMOVDQA32Zrm:
904 case X86::VMOVDQA64Z128rm:
905 case X86::VMOVDQA64Z256rm:
906 case X86::VMOVDQA64Zrm:
907 case X86::VMOVDQU16Z128rm:
908 case X86::VMOVDQU16Z256rm:
909 case X86::VMOVDQU16Zrm:
910 case X86::VMOVDQU32Z128rm:
911 case X86::VMOVDQU32Z256rm:
912 case X86::VMOVDQU32Zrm:
913 case X86::VMOVDQU64Z128rm:
914 case X86::VMOVDQU64Z256rm:
915 case X86::VMOVDQU64Zrm:
916 case X86::VMOVDQU8Z128rm:
917 case X86::VMOVDQU8Z256rm:
918 case X86::VMOVDQU8Zrm:
919 case X86::VMOVUPDZ128rm:
920 case X86::VMOVUPDZ256rm:
921 case X86::VMOVUPDZrm:
922 case X86::VMOVUPSZ128rm:
923 case X86::VMOVUPSZ256rm:
924 case X86::VMOVUPSZ128rm_NOVLX:
925 case X86::VMOVUPSZ256rm_NOVLX:
926 case X86::VMOVUPSZrm: {
927 // Loads from constant pools are trivially rematerializable.
928 if (MI.getOperand(1 + X86::AddrBaseReg).isReg() &&
929 MI.getOperand(1 + X86::AddrScaleAmt).isImm() &&
930 MI.getOperand(1 + X86::AddrIndexReg).isReg() &&
931 MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
932 MI.isDereferenceableInvariantLoad()) {
933 Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
934 if (BaseReg == 0 || BaseReg == X86::RIP)
935 return true;
936 // Allow re-materialization of PIC load.
937 if (!(!ReMatPICStubLoad && MI.getOperand(1 + X86::AddrDisp).isGlobal())) {
938 const MachineFunction &MF = *MI.getParent()->getParent();
939 const MachineRegisterInfo &MRI = MF.getRegInfo();
940 if (regIsPICBase(BaseReg, MRI))
941 return true;
942 }
943 }
944 break;
945 }
946
947 case X86::LEA32r:
948 case X86::LEA64r: {
949 if (MI.getOperand(1 + X86::AddrScaleAmt).isImm() &&
950 MI.getOperand(1 + X86::AddrIndexReg).isReg() &&
951 MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
952 !MI.getOperand(1 + X86::AddrDisp).isReg()) {
953 // lea fi#, lea GV, etc. are all rematerializable.
954 if (!MI.getOperand(1 + X86::AddrBaseReg).isReg())
955 return true;
956 Register BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
957 if (BaseReg == 0)
958 return true;
959 // Allow re-materialization of lea PICBase + x.
960 const MachineFunction &MF = *MI.getParent()->getParent();
961 const MachineRegisterInfo &MRI = MF.getRegInfo();
962 if (regIsPICBase(BaseReg, MRI))
963 return true;
964 }
965 break;
966 }
967 }
969}
970
973 Register DestReg, unsigned SubIdx,
974 const MachineInstr &Orig,
975 const TargetRegisterInfo &TRI) const {
976 bool ClobbersEFLAGS = Orig.modifiesRegister(X86::EFLAGS, &TRI);
977 if (ClobbersEFLAGS && MBB.computeRegisterLiveness(&TRI, X86::EFLAGS, I) !=
979 // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side
980 // effects.
981 int Value;
982 switch (Orig.getOpcode()) {
983 case X86::MOV32r0:
984 Value = 0;
985 break;
986 case X86::MOV32r1:
987 Value = 1;
988 break;
989 case X86::MOV32r_1:
990 Value = -1;
991 break;
992 default:
993 llvm_unreachable("Unexpected instruction!");
994 }
995
996 const DebugLoc &DL = Orig.getDebugLoc();
997 BuildMI(MBB, I, DL, get(X86::MOV32ri))
998 .add(Orig.getOperand(0))
999 .addImm(Value);
1000 } else {
1002 MBB.insert(I, MI);
1003 }
1004
1005 MachineInstr &NewMI = *std::prev(I);
1006 NewMI.substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
1007}
1008
1009/// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead.
1011 for (const MachineOperand &MO : MI.operands()) {
1012 if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS &&
1013 !MO.isDead()) {
1014 return true;
1015 }
1016 }
1017 return false;
1018}
1019
1020/// Check whether the shift count for a machine operand is non-zero.
1021inline static unsigned getTruncatedShiftCount(const MachineInstr &MI,
1022 unsigned ShiftAmtOperandIdx) {
1023 // The shift count is six bits with the REX.W prefix and five bits without.
1024 unsigned ShiftCountMask = (MI.getDesc().TSFlags & X86II::REX_W) ? 63 : 31;
1025 unsigned Imm = MI.getOperand(ShiftAmtOperandIdx).getImm();
1026 return Imm & ShiftCountMask;
1027}
1028
1029/// Check whether the given shift count is appropriate
1030/// can be represented by a LEA instruction.
1031inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
1032 // Left shift instructions can be transformed into load-effective-address
1033 // instructions if we can encode them appropriately.
1034 // A LEA instruction utilizes a SIB byte to encode its scale factor.
1035 // The SIB.scale field is two bits wide which means that we can encode any
1036 // shift amount less than 4.
1037 return ShAmt < 4 && ShAmt > 0;
1038}
1039
1041 MachineInstr &CmpValDefInstr,
1042 const MachineRegisterInfo *MRI,
1043 MachineInstr **AndInstr,
1044 const TargetRegisterInfo *TRI,
1045 bool &NoSignFlag, bool &ClearsOverflowFlag) {
1046 if (!(CmpValDefInstr.getOpcode() == X86::SUBREG_TO_REG &&
1047 CmpInstr.getOpcode() == X86::TEST64rr) &&
1048 !(CmpValDefInstr.getOpcode() == X86::COPY &&
1049 CmpInstr.getOpcode() == X86::TEST16rr))
1050 return false;
1051
1052 // CmpInstr is a TEST16rr/TEST64rr instruction, and
1053 // `X86InstrInfo::analyzeCompare` guarantees that it's analyzable only if two
1054 // registers are identical.
1055 assert((CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) &&
1056 "CmpInstr is an analyzable TEST16rr/TEST64rr, and "
1057 "`X86InstrInfo::analyzeCompare` requires two reg operands are the"
1058 "same.");
1059
1060 // Caller (`X86InstrInfo::optimizeCompareInstr`) guarantees that
1061 // `CmpValDefInstr` defines the value that's used by `CmpInstr`; in this case
1062 // if `CmpValDefInstr` sets the EFLAGS, it is likely that `CmpInstr` is
1063 // redundant.
1064 assert(
1065 (MRI->getVRegDef(CmpInstr.getOperand(0).getReg()) == &CmpValDefInstr) &&
1066 "Caller guarantees that TEST64rr is a user of SUBREG_TO_REG or TEST16rr "
1067 "is a user of COPY sub16bit.");
1068 MachineInstr *VregDefInstr = nullptr;
1069 if (CmpInstr.getOpcode() == X86::TEST16rr) {
1070 if (!CmpValDefInstr.getOperand(1).getReg().isVirtual())
1071 return false;
1072 VregDefInstr = MRI->getVRegDef(CmpValDefInstr.getOperand(1).getReg());
1073 if (!VregDefInstr)
1074 return false;
1075 // We can only remove test when AND32ri or AND64ri32 whose imm can fit 16bit
1076 // size, others 32/64 bit ops would test higher bits which test16rr don't
1077 // want to.
1078 if (!((VregDefInstr->getOpcode() == X86::AND32ri ||
1079 VregDefInstr->getOpcode() == X86::AND64ri32) &&
1080 isUInt<16>(VregDefInstr->getOperand(2).getImm())))
1081 return false;
1082 }
1083
1084 if (CmpInstr.getOpcode() == X86::TEST64rr) {
1085 // As seen in X86 td files, CmpValDefInstr.getOperand(1).getImm() is
1086 // typically 0.
1087 if (CmpValDefInstr.getOperand(1).getImm() != 0)
1088 return false;
1089
1090 // As seen in X86 td files, CmpValDefInstr.getOperand(3) is typically
1091 // sub_32bit or sub_xmm.
1092 if (CmpValDefInstr.getOperand(3).getImm() != X86::sub_32bit)
1093 return false;
1094
1095 VregDefInstr = MRI->getVRegDef(CmpValDefInstr.getOperand(2).getReg());
1096 }
1097
1098 assert(VregDefInstr && "Must have a definition (SSA)");
1099
1100 // Requires `CmpValDefInstr` and `VregDefInstr` are from the same MBB
1101 // to simplify the subsequent analysis.
1102 //
1103 // FIXME: If `VregDefInstr->getParent()` is the only predecessor of
1104 // `CmpValDefInstr.getParent()`, this could be handled.
1105 if (VregDefInstr->getParent() != CmpValDefInstr.getParent())
1106 return false;
1107
1108 if (X86::isAND(VregDefInstr->getOpcode())) {
1109 // Get a sequence of instructions like
1110 // %reg = and* ... // Set EFLAGS
1111 // ... // EFLAGS not changed
1112 // %extended_reg = subreg_to_reg 0, %reg, %subreg.sub_32bit
1113 // test64rr %extended_reg, %extended_reg, implicit-def $eflags
1114 // or
1115 // %reg = and32* ...
1116 // ... // EFLAGS not changed.
1117 // %src_reg = copy %reg.sub_16bit:gr32
1118 // test16rr %src_reg, %src_reg, implicit-def $eflags
1119 //
1120 // If subsequent readers use a subset of bits that don't change
1121 // after `and*` instructions, it's likely that the test64rr could
1122 // be optimized away.
1123 for (const MachineInstr &Instr :
1124 make_range(std::next(MachineBasicBlock::iterator(VregDefInstr)),
1125 MachineBasicBlock::iterator(CmpValDefInstr))) {
1126 // There are instructions between 'VregDefInstr' and
1127 // 'CmpValDefInstr' that modifies EFLAGS.
1128 if (Instr.modifiesRegister(X86::EFLAGS, TRI))
1129 return false;
1130 }
1131
1132 *AndInstr = VregDefInstr;
1133
1134 // AND instruction will essentially update SF and clear OF, so
1135 // NoSignFlag should be false in the sense that SF is modified by `AND`.
1136 //
1137 // However, the implementation artifically sets `NoSignFlag` to true
1138 // to poison the SF bit; that is to say, if SF is looked at later, the
1139 // optimization (to erase TEST64rr) will be disabled.
1140 //
1141 // The reason to poison SF bit is that SF bit value could be different
1142 // in the `AND` and `TEST` operation; signed bit is not known for `AND`,
1143 // and is known to be 0 as a result of `TEST64rr`.
1144 //
1145 // FIXME: As opposed to poisoning the SF bit directly, consider peeking into
1146 // the AND instruction and using the static information to guide peephole
1147 // optimization if possible. For example, it's possible to fold a
1148 // conditional move into a copy if the relevant EFLAG bits could be deduced
1149 // from an immediate operand of and operation.
1150 //
1151 NoSignFlag = true;
1152 // ClearsOverflowFlag is true for AND operation (no surprise).
1153 ClearsOverflowFlag = true;
1154 return true;
1155 }
1156 return false;
1157}
1158
1160 unsigned Opc, bool AllowSP, Register &NewSrc,
1161 bool &isKill, MachineOperand &ImplicitOp,
1162 LiveVariables *LV, LiveIntervals *LIS) const {
1163 MachineFunction &MF = *MI.getParent()->getParent();
1164 const TargetRegisterClass *RC;
1165 if (AllowSP) {
1166 RC = Opc != X86::LEA32r ? &X86::GR64RegClass : &X86::GR32RegClass;
1167 } else {
1168 RC = Opc != X86::LEA32r ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass;
1169 }
1170 Register SrcReg = Src.getReg();
1171 isKill = MI.killsRegister(SrcReg, /*TRI=*/nullptr);
1172
1173 // For both LEA64 and LEA32 the register already has essentially the right
1174 // type (32-bit or 64-bit) we may just need to forbid SP.
1175 if (Opc != X86::LEA64_32r) {
1176 NewSrc = SrcReg;
1177 assert(!Src.isUndef() && "Undef op doesn't need optimization");
1178
1179 if (NewSrc.isVirtual() && !MF.getRegInfo().constrainRegClass(NewSrc, RC))
1180 return false;
1181
1182 return true;
1183 }
1184
1185 // This is for an LEA64_32r and incoming registers are 32-bit. One way or
1186 // another we need to add 64-bit registers to the final MI.
1187 if (SrcReg.isPhysical()) {
1188 ImplicitOp = Src;
1189 ImplicitOp.setImplicit();
1190
1191 NewSrc = getX86SubSuperRegister(SrcReg, 64);
1192 assert(NewSrc.isValid() && "Invalid Operand");
1193 assert(!Src.isUndef() && "Undef op doesn't need optimization");
1194 } else {
1195 // Virtual register of the wrong class, we have to create a temporary 64-bit
1196 // vreg to feed into the LEA.
1197 NewSrc = MF.getRegInfo().createVirtualRegister(RC);
1198 MachineInstr *Copy =
1199 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY))
1200 .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
1201 .addReg(SrcReg, getKillRegState(isKill));
1202
1203 // Which is obviously going to be dead after we're done with it.
1204 isKill = true;
1205
1206 if (LV)
1207 LV->replaceKillInstruction(SrcReg, MI, *Copy);
1208
1209 if (LIS) {
1210 SlotIndex CopyIdx = LIS->InsertMachineInstrInMaps(*Copy);
1212 LiveInterval &LI = LIS->getInterval(SrcReg);
1214 if (S->end.getBaseIndex() == Idx)
1215 S->end = CopyIdx.getRegSlot();
1216 }
1217 }
1218
1219 // We've set all the parameters without issue.
1220 return true;
1221}
1222
1223MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
1225 LiveVariables *LV,
1226 LiveIntervals *LIS,
1227 bool Is8BitOp) const {
1228 // We handle 8-bit adds and various 16-bit opcodes in the switch below.
1229 MachineBasicBlock &MBB = *MI.getParent();
1231 assert((Is8BitOp ||
1233 *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) &&
1234 "Unexpected type for LEA transform");
1235
1236 // TODO: For a 32-bit target, we need to adjust the LEA variables with
1237 // something like this:
1238 // Opcode = X86::LEA32r;
1239 // InRegLEA = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
1240 // OutRegLEA =
1241 // Is8BitOp ? RegInfo.createVirtualRegister(&X86::GR32ABCD_RegClass)
1242 // : RegInfo.createVirtualRegister(&X86::GR32RegClass);
1243 if (!Subtarget.is64Bit())
1244 return nullptr;
1245
1246 unsigned Opcode = X86::LEA64_32r;
1247 Register InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
1248 Register OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass);
1249 Register InRegLEA2;
1250
1251 // Build and insert into an implicit UNDEF value. This is OK because
1252 // we will be shifting and then extracting the lower 8/16-bits.
1253 // This has the potential to cause partial register stall. e.g.
1254 // movw (%rbp,%rcx,2), %dx
1255 // leal -65(%rdx), %esi
1256 // But testing has shown this *does* help performance in 64-bit mode (at
1257 // least on modern x86 machines).
1258 MachineBasicBlock::iterator MBBI = MI.getIterator();
1259 Register Dest = MI.getOperand(0).getReg();
1260 Register Src = MI.getOperand(1).getReg();
1261 Register Src2;
1262 bool IsDead = MI.getOperand(0).isDead();
1263 bool IsKill = MI.getOperand(1).isKill();
1264 unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit;
1265 assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization");
1266 MachineInstr *ImpDef =
1267 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA);
1268 MachineInstr *InsMI =
1269 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
1270 .addReg(InRegLEA, RegState::Define, SubReg)
1271 .addReg(Src, getKillRegState(IsKill));
1272 MachineInstr *ImpDef2 = nullptr;
1273 MachineInstr *InsMI2 = nullptr;
1274
1276 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
1277 switch (MIOpc) {
1278 default:
1279 llvm_unreachable("Unreachable!");
1280 case X86::SHL8ri:
1281 case X86::SHL16ri: {
1282 unsigned ShAmt = MI.getOperand(2).getImm();
1283 MIB.addReg(0)
1284 .addImm(1LL << ShAmt)
1285 .addReg(InRegLEA, RegState::Kill)
1286 .addImm(0)
1287 .addReg(0);
1288 break;
1289 }
1290 case X86::INC8r:
1291 case X86::INC16r:
1292 addRegOffset(MIB, InRegLEA, true, 1);
1293 break;
1294 case X86::DEC8r:
1295 case X86::DEC16r:
1296 addRegOffset(MIB, InRegLEA, true, -1);
1297 break;
1298 case X86::ADD8ri:
1299 case X86::ADD8ri_DB:
1300 case X86::ADD16ri:
1301 case X86::ADD16ri_DB:
1302 addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm());
1303 break;
1304 case X86::ADD8rr:
1305 case X86::ADD8rr_DB:
1306 case X86::ADD16rr:
1307 case X86::ADD16rr_DB: {
1308 Src2 = MI.getOperand(2).getReg();
1309 bool IsKill2 = MI.getOperand(2).isKill();
1310 assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization");
1311 if (Src == Src2) {
1312 // ADD8rr/ADD16rr killed %reg1028, %reg1028
1313 // just a single insert_subreg.
1314 addRegReg(MIB, InRegLEA, true, InRegLEA, false);
1315 } else {
1316 if (Subtarget.is64Bit())
1317 InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
1318 else
1319 InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
1320 // Build and insert into an implicit UNDEF value. This is OK because
1321 // we will be shifting and then extracting the lower 8/16-bits.
1322 ImpDef2 = BuildMI(MBB, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF),
1323 InRegLEA2);
1324 InsMI2 = BuildMI(MBB, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
1325 .addReg(InRegLEA2, RegState::Define, SubReg)
1326 .addReg(Src2, getKillRegState(IsKill2));
1327 addRegReg(MIB, InRegLEA, true, InRegLEA2, true);
1328 }
1329 if (LV && IsKill2 && InsMI2)
1330 LV->replaceKillInstruction(Src2, MI, *InsMI2);
1331 break;
1332 }
1333 }
1334
1335 MachineInstr *NewMI = MIB;
1336 MachineInstr *ExtMI =
1337 BuildMI(MBB, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
1339 .addReg(OutRegLEA, RegState::Kill, SubReg);
1340
1341 if (LV) {
1342 // Update live variables.
1343 LV->getVarInfo(InRegLEA).Kills.push_back(NewMI);
1344 if (InRegLEA2)
1345 LV->getVarInfo(InRegLEA2).Kills.push_back(NewMI);
1346 LV->getVarInfo(OutRegLEA).Kills.push_back(ExtMI);
1347 if (IsKill)
1348 LV->replaceKillInstruction(Src, MI, *InsMI);
1349 if (IsDead)
1350 LV->replaceKillInstruction(Dest, MI, *ExtMI);
1351 }
1352
1353 if (LIS) {
1354 LIS->InsertMachineInstrInMaps(*ImpDef);
1355 SlotIndex InsIdx = LIS->InsertMachineInstrInMaps(*InsMI);
1356 if (ImpDef2)
1357 LIS->InsertMachineInstrInMaps(*ImpDef2);
1358 SlotIndex Ins2Idx;
1359 if (InsMI2)
1360 Ins2Idx = LIS->InsertMachineInstrInMaps(*InsMI2);
1361 SlotIndex NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
1362 SlotIndex ExtIdx = LIS->InsertMachineInstrInMaps(*ExtMI);
1363 LIS->getInterval(InRegLEA);
1364 LIS->getInterval(OutRegLEA);
1365 if (InRegLEA2)
1366 LIS->getInterval(InRegLEA2);
1367
1368 // Move the use of Src up to InsMI.
1369 LiveInterval &SrcLI = LIS->getInterval(Src);
1370 LiveRange::Segment *SrcSeg = SrcLI.getSegmentContaining(NewIdx);
1371 if (SrcSeg->end == NewIdx.getRegSlot())
1372 SrcSeg->end = InsIdx.getRegSlot();
1373
1374 if (InsMI2) {
1375 // Move the use of Src2 up to InsMI2.
1376 LiveInterval &Src2LI = LIS->getInterval(Src2);
1377 LiveRange::Segment *Src2Seg = Src2LI.getSegmentContaining(NewIdx);
1378 if (Src2Seg->end == NewIdx.getRegSlot())
1379 Src2Seg->end = Ins2Idx.getRegSlot();
1380 }
1381
1382 // Move the definition of Dest down to ExtMI.
1383 LiveInterval &DestLI = LIS->getInterval(Dest);
1384 LiveRange::Segment *DestSeg =
1385 DestLI.getSegmentContaining(NewIdx.getRegSlot());
1386 assert(DestSeg->start == NewIdx.getRegSlot() &&
1387 DestSeg->valno->def == NewIdx.getRegSlot());
1388 DestSeg->start = ExtIdx.getRegSlot();
1389 DestSeg->valno->def = ExtIdx.getRegSlot();
1390 }
1391
1392 return ExtMI;
1393}
1394
1395/// This method must be implemented by targets that
1396/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
1397/// may be able to convert a two-address instruction into a true
1398/// three-address instruction on demand. This allows the X86 target (for
1399/// example) to convert ADD and SHL instructions into LEA instructions if they
1400/// would require register copies due to two-addressness.
1401///
1402/// This method returns a null pointer if the transformation cannot be
1403/// performed, otherwise it returns the new instruction.
1404///
1406 LiveVariables *LV,
1407 LiveIntervals *LIS) const {
1408 // The following opcodes also sets the condition code register(s). Only
1409 // convert them to equivalent lea if the condition code register def's
1410 // are dead!
1412 return nullptr;
1413
1414 MachineFunction &MF = *MI.getParent()->getParent();
1415 // All instructions input are two-addr instructions. Get the known operands.
1416 const MachineOperand &Dest = MI.getOperand(0);
1417 const MachineOperand &Src = MI.getOperand(1);
1418
1419 // Ideally, operations with undef should be folded before we get here, but we
1420 // can't guarantee it. Bail out because optimizing undefs is a waste of time.
1421 // Without this, we have to forward undef state to new register operands to
1422 // avoid machine verifier errors.
1423 if (Src.isUndef())
1424 return nullptr;
1425 if (MI.getNumOperands() > 2)
1426 if (MI.getOperand(2).isReg() && MI.getOperand(2).isUndef())
1427 return nullptr;
1428
1429 MachineInstr *NewMI = nullptr;
1430 Register SrcReg, SrcReg2;
1431 bool Is64Bit = Subtarget.is64Bit();
1432
1433 bool Is8BitOp = false;
1434 unsigned NumRegOperands = 2;
1435 unsigned MIOpc = MI.getOpcode();
1436 switch (MIOpc) {
1437 default:
1438 llvm_unreachable("Unreachable!");
1439 case X86::SHL64ri: {
1440 assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
1441 unsigned ShAmt = getTruncatedShiftCount(MI, 2);
1442 if (!isTruncatedShiftCountForLEA(ShAmt))
1443 return nullptr;
1444
1445 // LEA can't handle RSP.
1446 if (Src.getReg().isVirtual() && !MF.getRegInfo().constrainRegClass(
1447 Src.getReg(), &X86::GR64_NOSPRegClass))
1448 return nullptr;
1449
1450 NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r))
1451 .add(Dest)
1452 .addReg(0)
1453 .addImm(1LL << ShAmt)
1454 .add(Src)
1455 .addImm(0)
1456 .addReg(0);
1457 break;
1458 }
1459 case X86::SHL32ri: {
1460 assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
1461 unsigned ShAmt = getTruncatedShiftCount(MI, 2);
1462 if (!isTruncatedShiftCountForLEA(ShAmt))
1463 return nullptr;
1464
1465 unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1466
1467 // LEA can't handle ESP.
1468 bool isKill;
1469 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1470 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
1471 ImplicitOp, LV, LIS))
1472 return nullptr;
1473
1474 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1475 .add(Dest)
1476 .addReg(0)
1477 .addImm(1LL << ShAmt)
1478 .addReg(SrcReg, getKillRegState(isKill))
1479 .addImm(0)
1480 .addReg(0);
1481 if (ImplicitOp.getReg() != 0)
1482 MIB.add(ImplicitOp);
1483 NewMI = MIB;
1484
1485 // Add kills if classifyLEAReg created a new register.
1486 if (LV && SrcReg != Src.getReg())
1487 LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
1488 break;
1489 }
1490 case X86::SHL8ri:
1491 Is8BitOp = true;
1492 [[fallthrough]];
1493 case X86::SHL16ri: {
1494 assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
1495 unsigned ShAmt = getTruncatedShiftCount(MI, 2);
1496 if (!isTruncatedShiftCountForLEA(ShAmt))
1497 return nullptr;
1498 return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
1499 }
1500 case X86::INC64r:
1501 case X86::INC32r: {
1502 assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
1503 unsigned Opc = MIOpc == X86::INC64r
1504 ? X86::LEA64r
1505 : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
1506 bool isKill;
1507 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1508 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
1509 ImplicitOp, LV, LIS))
1510 return nullptr;
1511
1512 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1513 .add(Dest)
1514 .addReg(SrcReg, getKillRegState(isKill));
1515 if (ImplicitOp.getReg() != 0)
1516 MIB.add(ImplicitOp);
1517
1518 NewMI = addOffset(MIB, 1);
1519
1520 // Add kills if classifyLEAReg created a new register.
1521 if (LV && SrcReg != Src.getReg())
1522 LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
1523 break;
1524 }
1525 case X86::DEC64r:
1526 case X86::DEC32r: {
1527 assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
1528 unsigned Opc = MIOpc == X86::DEC64r
1529 ? X86::LEA64r
1530 : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
1531
1532 bool isKill;
1533 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1534 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
1535 ImplicitOp, LV, LIS))
1536 return nullptr;
1537
1538 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1539 .add(Dest)
1540 .addReg(SrcReg, getKillRegState(isKill));
1541 if (ImplicitOp.getReg() != 0)
1542 MIB.add(ImplicitOp);
1543
1544 NewMI = addOffset(MIB, -1);
1545
1546 // Add kills if classifyLEAReg created a new register.
1547 if (LV && SrcReg != Src.getReg())
1548 LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
1549 break;
1550 }
1551 case X86::DEC8r:
1552 case X86::INC8r:
1553 Is8BitOp = true;
1554 [[fallthrough]];
1555 case X86::DEC16r:
1556 case X86::INC16r:
1557 return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
1558 case X86::ADD64rr:
1559 case X86::ADD64rr_DB:
1560 case X86::ADD32rr:
1561 case X86::ADD32rr_DB: {
1562 assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
1563 unsigned Opc;
1564 if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
1565 Opc = X86::LEA64r;
1566 else
1567 Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1568
1569 const MachineOperand &Src2 = MI.getOperand(2);
1570 bool isKill2;
1571 MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
1572 if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/false, SrcReg2, isKill2,
1573 ImplicitOp2, LV, LIS))
1574 return nullptr;
1575
1576 bool isKill;
1577 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1578 if (Src.getReg() == Src2.getReg()) {
1579 // Don't call classify LEAReg a second time on the same register, in case
1580 // the first call inserted a COPY from Src2 and marked it as killed.
1581 isKill = isKill2;
1582 SrcReg = SrcReg2;
1583 } else {
1584 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, isKill,
1585 ImplicitOp, LV, LIS))
1586 return nullptr;
1587 }
1588
1589 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)).add(Dest);
1590 if (ImplicitOp.getReg() != 0)
1591 MIB.add(ImplicitOp);
1592 if (ImplicitOp2.getReg() != 0)
1593 MIB.add(ImplicitOp2);
1594
1595 NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2);
1596
1597 // Add kills if classifyLEAReg created a new register.
1598 if (LV) {
1599 if (SrcReg2 != Src2.getReg())
1600 LV->getVarInfo(SrcReg2).Kills.push_back(NewMI);
1601 if (SrcReg != SrcReg2 && SrcReg != Src.getReg())
1602 LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
1603 }
1604 NumRegOperands = 3;
1605 break;
1606 }
1607 case X86::ADD8rr:
1608 case X86::ADD8rr_DB:
1609 Is8BitOp = true;
1610 [[fallthrough]];
1611 case X86::ADD16rr:
1612 case X86::ADD16rr_DB:
1613 return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
1614 case X86::ADD64ri32:
1615 case X86::ADD64ri32_DB:
1616 assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
1617 NewMI = addOffset(
1618 BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src),
1619 MI.getOperand(2));
1620 break;
1621 case X86::ADD32ri:
1622 case X86::ADD32ri_DB: {
1623 assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
1624 unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1625
1626 bool isKill;
1627 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1628 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, isKill,
1629 ImplicitOp, LV, LIS))
1630 return nullptr;
1631
1632 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1633 .add(Dest)
1634 .addReg(SrcReg, getKillRegState(isKill));
1635 if (ImplicitOp.getReg() != 0)
1636 MIB.add(ImplicitOp);
1637
1638 NewMI = addOffset(MIB, MI.getOperand(2));
1639
1640 // Add kills if classifyLEAReg created a new register.
1641 if (LV && SrcReg != Src.getReg())
1642 LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
1643 break;
1644 }
1645 case X86::ADD8ri:
1646 case X86::ADD8ri_DB:
1647 Is8BitOp = true;
1648 [[fallthrough]];
1649 case X86::ADD16ri:
1650 case X86::ADD16ri_DB:
1651 return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
1652 case X86::SUB8ri:
1653 case X86::SUB16ri:
1654 /// FIXME: Support these similar to ADD8ri/ADD16ri*.
1655 return nullptr;
1656 case X86::SUB32ri: {
1657 if (!MI.getOperand(2).isImm())
1658 return nullptr;
1659 int64_t Imm = MI.getOperand(2).getImm();
1660 if (!isInt<32>(-Imm))
1661 return nullptr;
1662
1663 assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
1664 unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1665
1666 bool isKill;
1667 MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1668 if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, isKill,
1669 ImplicitOp, LV, LIS))
1670 return nullptr;
1671
1672 MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1673 .add(Dest)
1674 .addReg(SrcReg, getKillRegState(isKill));
1675 if (ImplicitOp.getReg() != 0)
1676 MIB.add(ImplicitOp);
1677
1678 NewMI = addOffset(MIB, -Imm);
1679
1680 // Add kills if classifyLEAReg created a new register.
1681 if (LV && SrcReg != Src.getReg())
1682 LV->getVarInfo(SrcReg).Kills.push_back(NewMI);
1683 break;
1684 }
1685
1686 case X86::SUB64ri32: {
1687 if (!MI.getOperand(2).isImm())
1688 return nullptr;
1689 int64_t Imm = MI.getOperand(2).getImm();
1690 if (!isInt<32>(-Imm))
1691 return nullptr;
1692
1693 assert(MI.getNumOperands() >= 3 && "Unknown sub instruction!");
1694
1696 BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src);
1697 NewMI = addOffset(MIB, -Imm);
1698 break;
1699 }
1700
1701 case X86::VMOVDQU8Z128rmk:
1702 case X86::VMOVDQU8Z256rmk:
1703 case X86::VMOVDQU8Zrmk:
1704 case X86::VMOVDQU16Z128rmk:
1705 case X86::VMOVDQU16Z256rmk:
1706 case X86::VMOVDQU16Zrmk:
1707 case X86::VMOVDQU32Z128rmk:
1708 case X86::VMOVDQA32Z128rmk:
1709 case X86::VMOVDQU32Z256rmk:
1710 case X86::VMOVDQA32Z256rmk:
1711 case X86::VMOVDQU32Zrmk:
1712 case X86::VMOVDQA32Zrmk:
1713 case X86::VMOVDQU64Z128rmk:
1714 case X86::VMOVDQA64Z128rmk:
1715 case X86::VMOVDQU64Z256rmk:
1716 case X86::VMOVDQA64Z256rmk:
1717 case X86::VMOVDQU64Zrmk:
1718 case X86::VMOVDQA64Zrmk:
1719 case X86::VMOVUPDZ128rmk:
1720 case X86::VMOVAPDZ128rmk:
1721 case X86::VMOVUPDZ256rmk:
1722 case X86::VMOVAPDZ256rmk:
1723 case X86::VMOVUPDZrmk:
1724 case X86::VMOVAPDZrmk:
1725 case X86::VMOVUPSZ128rmk:
1726 case X86::VMOVAPSZ128rmk:
1727 case X86::VMOVUPSZ256rmk:
1728 case X86::VMOVAPSZ256rmk:
1729 case X86::VMOVUPSZrmk:
1730 case X86::VMOVAPSZrmk:
1731 case X86::VBROADCASTSDZ256rmk:
1732 case X86::VBROADCASTSDZrmk:
1733 case X86::VBROADCASTSSZ128rmk:
1734 case X86::VBROADCASTSSZ256rmk:
1735 case X86::VBROADCASTSSZrmk:
1736 case X86::VPBROADCASTDZ128rmk:
1737 case X86::VPBROADCASTDZ256rmk:
1738 case X86::VPBROADCASTDZrmk:
1739 case X86::VPBROADCASTQZ128rmk:
1740 case X86::VPBROADCASTQZ256rmk:
1741 case X86::VPBROADCASTQZrmk: {
1742 unsigned Opc;
1743 switch (MIOpc) {
1744 default:
1745 llvm_unreachable("Unreachable!");
1746 case X86::VMOVDQU8Z128rmk:
1747 Opc = X86::VPBLENDMBZ128rmk;
1748 break;
1749 case X86::VMOVDQU8Z256rmk:
1750 Opc = X86::VPBLENDMBZ256rmk;
1751 break;
1752 case X86::VMOVDQU8Zrmk:
1753 Opc = X86::VPBLENDMBZrmk;
1754 break;
1755 case X86::VMOVDQU16Z128rmk:
1756 Opc = X86::VPBLENDMWZ128rmk;
1757 break;
1758 case X86::VMOVDQU16Z256rmk:
1759 Opc = X86::VPBLENDMWZ256rmk;
1760 break;
1761 case X86::VMOVDQU16Zrmk:
1762 Opc = X86::VPBLENDMWZrmk;
1763 break;
1764 case X86::VMOVDQU32Z128rmk:
1765 Opc = X86::VPBLENDMDZ128rmk;
1766 break;
1767 case X86::VMOVDQU32Z256rmk:
1768 Opc = X86::VPBLENDMDZ256rmk;
1769 break;
1770 case X86::VMOVDQU32Zrmk:
1771 Opc = X86::VPBLENDMDZrmk;
1772 break;
1773 case X86::VMOVDQU64Z128rmk:
1774 Opc = X86::VPBLENDMQZ128rmk;
1775 break;
1776 case X86::VMOVDQU64Z256rmk:
1777 Opc = X86::VPBLENDMQZ256rmk;
1778 break;
1779 case X86::VMOVDQU64Zrmk:
1780 Opc = X86::VPBLENDMQZrmk;
1781 break;
1782 case X86::VMOVUPDZ128rmk:
1783 Opc = X86::VBLENDMPDZ128rmk;
1784 break;
1785 case X86::VMOVUPDZ256rmk:
1786 Opc = X86::VBLENDMPDZ256rmk;
1787 break;
1788 case X86::VMOVUPDZrmk:
1789 Opc = X86::VBLENDMPDZrmk;
1790 break;
1791 case X86::VMOVUPSZ128rmk:
1792 Opc = X86::VBLENDMPSZ128rmk;
1793 break;
1794 case X86::VMOVUPSZ256rmk:
1795 Opc = X86::VBLENDMPSZ256rmk;
1796 break;
1797 case X86::VMOVUPSZrmk:
1798 Opc = X86::VBLENDMPSZrmk;
1799 break;
1800 case X86::VMOVDQA32Z128rmk:
1801 Opc = X86::VPBLENDMDZ128rmk;
1802 break;
1803 case X86::VMOVDQA32Z256rmk:
1804 Opc = X86::VPBLENDMDZ256rmk;
1805 break;
1806 case X86::VMOVDQA32Zrmk:
1807 Opc = X86::VPBLENDMDZrmk;
1808 break;
1809 case X86::VMOVDQA64Z128rmk:
1810 Opc = X86::VPBLENDMQZ128rmk;
1811 break;
1812 case X86::VMOVDQA64Z256rmk:
1813 Opc = X86::VPBLENDMQZ256rmk;
1814 break;
1815 case X86::VMOVDQA64Zrmk:
1816 Opc = X86::VPBLENDMQZrmk;
1817 break;
1818 case X86::VMOVAPDZ128rmk:
1819 Opc = X86::VBLENDMPDZ128rmk;
1820 break;
1821 case X86::VMOVAPDZ256rmk:
1822 Opc = X86::VBLENDMPDZ256rmk;
1823 break;
1824 case X86::VMOVAPDZrmk:
1825 Opc = X86::VBLENDMPDZrmk;
1826 break;
1827 case X86::VMOVAPSZ128rmk:
1828 Opc = X86::VBLENDMPSZ128rmk;
1829 break;
1830 case X86::VMOVAPSZ256rmk:
1831 Opc = X86::VBLENDMPSZ256rmk;
1832 break;
1833 case X86::VMOVAPSZrmk:
1834 Opc = X86::VBLENDMPSZrmk;
1835 break;
1836 case X86::VBROADCASTSDZ256rmk:
1837 Opc = X86::VBLENDMPDZ256rmbk;
1838 break;
1839 case X86::VBROADCASTSDZrmk:
1840 Opc = X86::VBLENDMPDZrmbk;
1841 break;
1842 case X86::VBROADCASTSSZ128rmk:
1843 Opc = X86::VBLENDMPSZ128rmbk;
1844 break;
1845 case X86::VBROADCASTSSZ256rmk:
1846 Opc = X86::VBLENDMPSZ256rmbk;
1847 break;
1848 case X86::VBROADCASTSSZrmk:
1849 Opc = X86::VBLENDMPSZrmbk;
1850 break;
1851 case X86::VPBROADCASTDZ128rmk:
1852 Opc = X86::VPBLENDMDZ128rmbk;
1853 break;
1854 case X86::VPBROADCASTDZ256rmk:
1855 Opc = X86::VPBLENDMDZ256rmbk;
1856 break;
1857 case X86::VPBROADCASTDZrmk:
1858 Opc = X86::VPBLENDMDZrmbk;
1859 break;
1860 case X86::VPBROADCASTQZ128rmk:
1861 Opc = X86::VPBLENDMQZ128rmbk;
1862 break;
1863 case X86::VPBROADCASTQZ256rmk:
1864 Opc = X86::VPBLENDMQZ256rmbk;
1865 break;
1866 case X86::VPBROADCASTQZrmk:
1867 Opc = X86::VPBLENDMQZrmbk;
1868 break;
1869 }
1870
1871 NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1872 .add(Dest)
1873 .add(MI.getOperand(2))
1874 .add(Src)
1875 .add(MI.getOperand(3))
1876 .add(MI.getOperand(4))
1877 .add(MI.getOperand(5))
1878 .add(MI.getOperand(6))
1879 .add(MI.getOperand(7));
1880 NumRegOperands = 4;
1881 break;
1882 }
1883
1884 case X86::VMOVDQU8Z128rrk:
1885 case X86::VMOVDQU8Z256rrk:
1886 case X86::VMOVDQU8Zrrk:
1887 case X86::VMOVDQU16Z128rrk:
1888 case X86::VMOVDQU16Z256rrk:
1889 case X86::VMOVDQU16Zrrk:
1890 case X86::VMOVDQU32Z128rrk:
1891 case X86::VMOVDQA32Z128rrk:
1892 case X86::VMOVDQU32Z256rrk:
1893 case X86::VMOVDQA32Z256rrk:
1894 case X86::VMOVDQU32Zrrk:
1895 case X86::VMOVDQA32Zrrk:
1896 case X86::VMOVDQU64Z128rrk:
1897 case X86::VMOVDQA64Z128rrk:
1898 case X86::VMOVDQU64Z256rrk:
1899 case X86::VMOVDQA64Z256rrk:
1900 case X86::VMOVDQU64Zrrk:
1901 case X86::VMOVDQA64Zrrk:
1902 case X86::VMOVUPDZ128rrk:
1903 case X86::VMOVAPDZ128rrk:
1904 case X86::VMOVUPDZ256rrk:
1905 case X86::VMOVAPDZ256rrk:
1906 case X86::VMOVUPDZrrk:
1907 case X86::VMOVAPDZrrk:
1908 case X86::VMOVUPSZ128rrk:
1909 case X86::VMOVAPSZ128rrk:
1910 case X86::VMOVUPSZ256rrk:
1911 case X86::VMOVAPSZ256rrk:
1912 case X86::VMOVUPSZrrk:
1913 case X86::VMOVAPSZrrk: {
1914 unsigned Opc;
1915 switch (MIOpc) {
1916 default:
1917 llvm_unreachable("Unreachable!");
1918 case X86::VMOVDQU8Z128rrk:
1919 Opc = X86::VPBLENDMBZ128rrk;
1920 break;
1921 case X86::VMOVDQU8Z256rrk:
1922 Opc = X86::VPBLENDMBZ256rrk;
1923 break;
1924 case X86::VMOVDQU8Zrrk:
1925 Opc = X86::VPBLENDMBZrrk;
1926 break;
1927 case X86::VMOVDQU16Z128rrk:
1928 Opc = X86::VPBLENDMWZ128rrk;
1929 break;
1930 case X86::VMOVDQU16Z256rrk:
1931 Opc = X86::VPBLENDMWZ256rrk;
1932 break;
1933 case X86::VMOVDQU16Zrrk:
1934 Opc = X86::VPBLENDMWZrrk;
1935 break;
1936 case X86::VMOVDQU32Z128rrk:
1937 Opc = X86::VPBLENDMDZ128rrk;
1938 break;
1939 case X86::VMOVDQU32Z256rrk:
1940 Opc = X86::VPBLENDMDZ256rrk;
1941 break;
1942 case X86::VMOVDQU32Zrrk:
1943 Opc = X86::VPBLENDMDZrrk;
1944 break;
1945 case X86::VMOVDQU64Z128rrk:
1946 Opc = X86::VPBLENDMQZ128rrk;
1947 break;
1948 case X86::VMOVDQU64Z256rrk:
1949 Opc = X86::VPBLENDMQZ256rrk;
1950 break;
1951 case X86::VMOVDQU64Zrrk:
1952 Opc = X86::VPBLENDMQZrrk;
1953 break;
1954 case X86::VMOVUPDZ128rrk:
1955 Opc = X86::VBLENDMPDZ128rrk;
1956 break;
1957 case X86::VMOVUPDZ256rrk:
1958 Opc = X86::VBLENDMPDZ256rrk;
1959 break;
1960 case X86::VMOVUPDZrrk:
1961 Opc = X86::VBLENDMPDZrrk;
1962 break;
1963 case X86::VMOVUPSZ128rrk:
1964 Opc = X86::VBLENDMPSZ128rrk;
1965 break;
1966 case X86::VMOVUPSZ256rrk:
1967 Opc = X86::VBLENDMPSZ256rrk;
1968 break;
1969 case X86::VMOVUPSZrrk:
1970 Opc = X86::VBLENDMPSZrrk;
1971 break;
1972 case X86::VMOVDQA32Z128rrk:
1973 Opc = X86::VPBLENDMDZ128rrk;
1974 break;
1975 case X86::VMOVDQA32Z256rrk:
1976 Opc = X86::VPBLENDMDZ256rrk;
1977 break;
1978 case X86::VMOVDQA32Zrrk:
1979 Opc = X86::VPBLENDMDZrrk;
1980 break;
1981 case X86::VMOVDQA64Z128rrk:
1982 Opc = X86::VPBLENDMQZ128rrk;
1983 break;
1984 case X86::VMOVDQA64Z256rrk:
1985 Opc = X86::VPBLENDMQZ256rrk;
1986 break;
1987 case X86::VMOVDQA64Zrrk:
1988 Opc = X86::VPBLENDMQZrrk;
1989 break;
1990 case X86::VMOVAPDZ128rrk:
1991 Opc = X86::VBLENDMPDZ128rrk;
1992 break;
1993 case X86::VMOVAPDZ256rrk:
1994 Opc = X86::VBLENDMPDZ256rrk;
1995 break;
1996 case X86::VMOVAPDZrrk:
1997 Opc = X86::VBLENDMPDZrrk;
1998 break;
1999 case X86::VMOVAPSZ128rrk:
2000 Opc = X86::VBLENDMPSZ128rrk;
2001 break;
2002 case X86::VMOVAPSZ256rrk:
2003 Opc = X86::VBLENDMPSZ256rrk;
2004 break;
2005 case X86::VMOVAPSZrrk:
2006 Opc = X86::VBLENDMPSZrrk;
2007 break;
2008 }
2009
2010 NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
2011 .add(Dest)
2012 .add(MI.getOperand(2))
2013 .add(Src)
2014 .add(MI.getOperand(3));
2015 NumRegOperands = 4;
2016 break;
2017 }
2018 }
2019
2020 if (!NewMI)
2021 return nullptr;
2022
2023 if (LV) { // Update live variables
2024 for (unsigned I = 0; I < NumRegOperands; ++I) {
2025 MachineOperand &Op = MI.getOperand(I);
2026 if (Op.isReg() && (Op.isDead() || Op.isKill()))
2027 LV->replaceKillInstruction(Op.getReg(), MI, *NewMI);
2028 }
2029 }
2030
2031 MachineBasicBlock &MBB = *MI.getParent();
2032 MBB.insert(MI.getIterator(), NewMI); // Insert the new inst
2033
2034 if (LIS) {
2035 LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
2036 if (SrcReg)
2037 LIS->getInterval(SrcReg);
2038 if (SrcReg2)
2039 LIS->getInterval(SrcReg2);
2040 }
2041
2042 return NewMI;
2043}
2044
2045/// This determines which of three possible cases of a three source commute
2046/// the source indexes correspond to taking into account any mask operands.
2047/// All prevents commuting a passthru operand. Returns -1 if the commute isn't
2048/// possible.
2049/// Case 0 - Possible to commute the first and second operands.
2050/// Case 1 - Possible to commute the first and third operands.
2051/// Case 2 - Possible to commute the second and third operands.
2052static unsigned getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1,
2053 unsigned SrcOpIdx2) {
2054 // Put the lowest index to SrcOpIdx1 to simplify the checks below.
2055 if (SrcOpIdx1 > SrcOpIdx2)
2056 std::swap(SrcOpIdx1, SrcOpIdx2);
2057
2058 unsigned Op1 = 1, Op2 = 2, Op3 = 3;
2059 if (X86II::isKMasked(TSFlags)) {
2060 Op2++;
2061 Op3++;
2062 }
2063
2064 if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op2)
2065 return 0;
2066 if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op3)
2067 return 1;
2068 if (SrcOpIdx1 == Op2 && SrcOpIdx2 == Op3)
2069 return 2;
2070 llvm_unreachable("Unknown three src commute case.");
2071}
2072
2074 const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2,
2075 const X86InstrFMA3Group &FMA3Group) const {
2076
2077 unsigned Opc = MI.getOpcode();
2078
2079 // TODO: Commuting the 1st operand of FMA*_Int requires some additional
2080 // analysis. The commute optimization is legal only if all users of FMA*_Int
2081 // use only the lowest element of the FMA*_Int instruction. Such analysis are
2082 // not implemented yet. So, just return 0 in that case.
2083 // When such analysis are available this place will be the right place for
2084 // calling it.
2085 assert(!(FMA3Group.isIntrinsic() && (SrcOpIdx1 == 1 || SrcOpIdx2 == 1)) &&
2086 "Intrinsic instructions can't commute operand 1");
2087
2088 // Determine which case this commute is or if it can't be done.
2089 unsigned Case =
2090 getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1, SrcOpIdx2);
2091 assert(Case < 3 && "Unexpected case number!");
2092
2093 // Define the FMA forms mapping array that helps to map input FMA form
2094 // to output FMA form to preserve the operation semantics after
2095 // commuting the operands.
2096 const unsigned Form132Index = 0;
2097 const unsigned Form213Index = 1;
2098 const unsigned Form231Index = 2;
2099 static const unsigned FormMapping[][3] = {
2100 // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2;
2101 // FMA132 A, C, b; ==> FMA231 C, A, b;
2102 // FMA213 B, A, c; ==> FMA213 A, B, c;
2103 // FMA231 C, A, b; ==> FMA132 A, C, b;
2104 {Form231Index, Form213Index, Form132Index},
2105 // 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3;
2106 // FMA132 A, c, B; ==> FMA132 B, c, A;
2107 // FMA213 B, a, C; ==> FMA231 C, a, B;
2108 // FMA231 C, a, B; ==> FMA213 B, a, C;
2109 {Form132Index, Form231Index, Form213Index},
2110 // 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3;
2111 // FMA132 a, C, B; ==> FMA213 a, B, C;
2112 // FMA213 b, A, C; ==> FMA132 b, C, A;
2113 // FMA231 c, A, B; ==> FMA231 c, B, A;
2114 {Form213Index, Form132Index, Form231Index}};
2115
2116 unsigned FMAForms[3];
2117 FMAForms[0] = FMA3Group.get132Opcode();
2118 FMAForms[1] = FMA3Group.get213Opcode();
2119 FMAForms[2] = FMA3Group.get231Opcode();
2120
2121 // Everything is ready, just adjust the FMA opcode and return it.
2122 for (unsigned FormIndex = 0; FormIndex < 3; FormIndex++)
2123 if (Opc == FMAForms[FormIndex])
2124 return FMAForms[FormMapping[Case][FormIndex]];
2125
2126 llvm_unreachable("Illegal FMA3 format");
2127}
2128
2129static void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1,
2130 unsigned SrcOpIdx2) {
2131 // Determine which case this commute is or if it can't be done.
2132 unsigned Case =
2133 getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1, SrcOpIdx2);
2134 assert(Case < 3 && "Unexpected case value!");
2135
2136 // For each case we need to swap two pairs of bits in the final immediate.
2137 static const uint8_t SwapMasks[3][4] = {
2138 {0x04, 0x10, 0x08, 0x20}, // Swap bits 2/4 and 3/5.
2139 {0x02, 0x10, 0x08, 0x40}, // Swap bits 1/4 and 3/6.
2140 {0x02, 0x04, 0x20, 0x40}, // Swap bits 1/2 and 5/6.
2141 };
2142
2143 uint8_t Imm = MI.getOperand(MI.getNumOperands() - 1).getImm();
2144 // Clear out the bits we are swapping.
2145 uint8_t NewImm = Imm & ~(SwapMasks[Case][0] | SwapMasks[Case][1] |
2146 SwapMasks[Case][2] | SwapMasks[Case][3]);
2147 // If the immediate had a bit of the pair set, then set the opposite bit.
2148 if (Imm & SwapMasks[Case][0])
2149 NewImm |= SwapMasks[Case][1];
2150 if (Imm & SwapMasks[Case][1])
2151 NewImm |= SwapMasks[Case][0];
2152 if (Imm & SwapMasks[Case][2])
2153 NewImm |= SwapMasks[Case][3];
2154 if (Imm & SwapMasks[Case][3])
2155 NewImm |= SwapMasks[Case][2];
2156 MI.getOperand(MI.getNumOperands() - 1).setImm(NewImm);
2157}
2158
2159// Returns true if this is a VPERMI2 or VPERMT2 instruction that can be
2160// commuted.
2161static bool isCommutableVPERMV3Instruction(unsigned Opcode) {
2162#define VPERM_CASES(Suffix) \
2163 case X86::VPERMI2##Suffix##Z128rr: \
2164 case X86::VPERMT2##Suffix##Z128rr: \
2165 case X86::VPERMI2##Suffix##Z256rr: \
2166 case X86::VPERMT2##Suffix##Z256rr: \
2167 case X86::VPERMI2##Suffix##Zrr: \
2168 case X86::VPERMT2##Suffix##Zrr: \
2169 case X86::VPERMI2##Suffix##Z128rm: \
2170 case X86::VPERMT2##Suffix##Z128rm: \
2171 case X86::VPERMI2##Suffix##Z256rm: \
2172 case X86::VPERMT2##Suffix##Z256rm: \
2173 case X86::VPERMI2##Suffix##Zrm: \
2174 case X86::VPERMT2##Suffix##Zrm: \
2175 case X86::VPERMI2##Suffix##Z128rrkz: \
2176 case X86::VPERMT2##Suffix##Z128rrkz: \
2177 case X86::VPERMI2##Suffix##Z256rrkz: \
2178 case X86::VPERMT2##Suffix##Z256rrkz: \
2179 case X86::VPERMI2##Suffix##Zrrkz: \
2180 case X86::VPERMT2##Suffix##Zrrkz: \
2181 case X86::VPERMI2##Suffix##Z128rmkz: \
2182 case X86::VPERMT2##Suffix##Z128rmkz: \
2183 case X86::VPERMI2##Suffix##Z256rmkz: \
2184 case X86::VPERMT2##Suffix##Z256rmkz: \
2185 case X86::VPERMI2##Suffix##Zrmkz: \
2186 case X86::VPERMT2##Suffix##Zrmkz:
2187
2188#define VPERM_CASES_BROADCAST(Suffix) \
2189 VPERM_CASES(Suffix) \
2190 case X86::VPERMI2##Suffix##Z128rmb: \
2191 case X86::VPERMT2##Suffix##Z128rmb: \
2192 case X86::VPERMI2##Suffix##Z256rmb: \
2193 case X86::VPERMT2##Suffix##Z256rmb: \
2194 case X86::VPERMI2##Suffix##Zrmb: \
2195 case X86::VPERMT2##Suffix##Zrmb: \
2196 case X86::VPERMI2##Suffix##Z128rmbkz: \
2197 case X86::VPERMT2##Suffix##Z128rmbkz: \
2198 case X86::VPERMI2##Suffix##Z256rmbkz: \
2199 case X86::VPERMT2##Suffix##Z256rmbkz: \
2200 case X86::VPERMI2##Suffix##Zrmbkz: \
2201 case X86::VPERMT2##Suffix##Zrmbkz:
2202
2203 switch (Opcode) {
2204 default:
2205 return false;
2206 VPERM_CASES(B)
2211 VPERM_CASES(W)
2212 return true;
2213 }
2214#undef VPERM_CASES_BROADCAST
2215#undef VPERM_CASES
2216}
2217
2218// Returns commuted opcode for VPERMI2 and VPERMT2 instructions by switching
2219// from the I opcode to the T opcode and vice versa.
2220static unsigned getCommutedVPERMV3Opcode(unsigned Opcode) {
2221#define VPERM_CASES(Orig, New) \
2222 case X86::Orig##Z128rr: \
2223 return X86::New##Z128rr; \
2224 case X86::Orig##Z128rrkz: \
2225 return X86::New##Z128rrkz; \
2226 case X86::Orig##Z128rm: \
2227 return X86::New##Z128rm; \
2228 case X86::Orig##Z128rmkz: \
2229 return X86::New##Z128rmkz; \
2230 case X86::Orig##Z256rr: \
2231 return X86::New##Z256rr; \
2232 case X86::Orig##Z256rrkz: \
2233 return X86::New##Z256rrkz; \
2234 case X86::Orig##Z256rm: \
2235 return X86::New##Z256rm; \
2236 case X86::Orig##Z256rmkz: \
2237 return X86::New##Z256rmkz; \
2238 case X86::Orig##Zrr: \
2239 return X86::New##Zrr; \
2240 case X86::Orig##Zrrkz: \
2241 return X86::New##Zrrkz; \
2242 case X86::Orig##Zrm: \
2243 return X86::New##Zrm; \
2244 case X86::Orig##Zrmkz: \
2245 return X86::New##Zrmkz;
2246
2247#define VPERM_CASES_BROADCAST(Orig, New) \
2248 VPERM_CASES(Orig, New) \
2249 case X86::Orig##Z128rmb: \
2250 return X86::New##Z128rmb; \
2251 case X86::Orig##Z128rmbkz: \
2252 return X86::New##Z128rmbkz; \
2253 case X86::Orig##Z256rmb: \
2254 return X86::New##Z256rmb; \
2255 case X86::Orig##Z256rmbkz: \
2256 return X86::New##Z256rmbkz; \
2257 case X86::Orig##Zrmb: \
2258 return X86::New##Zrmb; \
2259 case X86::Orig##Zrmbkz: \
2260 return X86::New##Zrmbkz;
2261
2262 switch (Opcode) {
2263 VPERM_CASES(VPERMI2B, VPERMT2B)
2264 VPERM_CASES_BROADCAST(VPERMI2D, VPERMT2D)
2265 VPERM_CASES_BROADCAST(VPERMI2PD, VPERMT2PD)
2266 VPERM_CASES_BROADCAST(VPERMI2PS, VPERMT2PS)
2267 VPERM_CASES_BROADCAST(VPERMI2Q, VPERMT2Q)
2268 VPERM_CASES(VPERMI2W, VPERMT2W)
2269 VPERM_CASES(VPERMT2B, VPERMI2B)
2270 VPERM_CASES_BROADCAST(VPERMT2D, VPERMI2D)
2271 VPERM_CASES_BROADCAST(VPERMT2PD, VPERMI2PD)
2272 VPERM_CASES_BROADCAST(VPERMT2PS, VPERMI2PS)
2273 VPERM_CASES_BROADCAST(VPERMT2Q, VPERMI2Q)
2274 VPERM_CASES(VPERMT2W, VPERMI2W)
2275 }
2276
2277 llvm_unreachable("Unreachable!");
2278#undef VPERM_CASES_BROADCAST
2279#undef VPERM_CASES
2280}
2281
2283 unsigned OpIdx1,
2284 unsigned OpIdx2) const {
2285 auto CloneIfNew = [&](MachineInstr &MI) {
2286 return std::exchange(NewMI, false)
2287 ? MI.getParent()->getParent()->CloneMachineInstr(&MI)
2288 : &MI;
2289 };
2290 MachineInstr *WorkingMI = nullptr;
2291 unsigned Opc = MI.getOpcode();
2292
2293#define CASE_ND(OP) \
2294 case X86::OP: \
2295 case X86::OP##_ND:
2296
2297 switch (Opc) {
2298 // SHLD B, C, I <-> SHRD C, B, (BitWidth - I)
2299 CASE_ND(SHRD16rri8)
2300 CASE_ND(SHLD16rri8)
2301 CASE_ND(SHRD32rri8)
2302 CASE_ND(SHLD32rri8)
2303 CASE_ND(SHRD64rri8)
2304 CASE_ND(SHLD64rri8) {
2305 unsigned Size;
2306 switch (Opc) {
2307 default:
2308 llvm_unreachable("Unreachable!");
2309#define FROM_TO_SIZE(A, B, S) \
2310 case X86::A: \
2311 Opc = X86::B; \
2312 Size = S; \
2313 break; \
2314 case X86::A##_ND: \
2315 Opc = X86::B##_ND; \
2316 Size = S; \
2317 break; \
2318 case X86::B: \
2319 Opc = X86::A; \
2320 Size = S; \
2321 break; \
2322 case X86::B##_ND: \
2323 Opc = X86::A##_ND; \
2324 Size = S; \
2325 break;
2326
2327 FROM_TO_SIZE(SHRD16rri8, SHLD16rri8, 16)
2328 FROM_TO_SIZE(SHRD32rri8, SHLD32rri8, 32)
2329 FROM_TO_SIZE(SHRD64rri8, SHLD64rri8, 64)
2330#undef FROM_TO_SIZE
2331 }
2332 WorkingMI = CloneIfNew(MI);
2333 WorkingMI->setDesc(get(Opc));
2334 WorkingMI->getOperand(3).setImm(Size - MI.getOperand(3).getImm());
2335 break;
2336 }
2337 case X86::PFSUBrr:
2338 case X86::PFSUBRrr:
2339 // PFSUB x, y: x = x - y
2340 // PFSUBR x, y: x = y - x
2341 WorkingMI = CloneIfNew(MI);
2342 WorkingMI->setDesc(
2343 get(X86::PFSUBRrr == Opc ? X86::PFSUBrr : X86::PFSUBRrr));
2344 break;
2345 case X86::BLENDPDrri:
2346 case X86::BLENDPSrri:
2347 case X86::VBLENDPDrri:
2348 case X86::VBLENDPSrri:
2349 // If we're optimizing for size, try to use MOVSD/MOVSS.
2350 if (MI.getParent()->getParent()->getFunction().hasOptSize()) {
2351 unsigned Mask = (Opc == X86::BLENDPDrri || Opc == X86::VBLENDPDrri) ? 0x03: 0x0F;
2352 if ((MI.getOperand(3).getImm() ^ Mask) == 1) {
2353#define FROM_TO(FROM, TO) \
2354 case X86::FROM: \
2355 Opc = X86::TO; \
2356 break;
2357 switch (Opc) {
2358 default:
2359 llvm_unreachable("Unreachable!");
2360 FROM_TO(BLENDPDrri, MOVSDrr)
2361 FROM_TO(BLENDPSrri, MOVSSrr)
2362 FROM_TO(VBLENDPDrri, VMOVSDrr)
2363 FROM_TO(VBLENDPSrri, VMOVSSrr)
2364 }
2365 WorkingMI = CloneIfNew(MI);
2366 WorkingMI->setDesc(get(Opc));
2367 WorkingMI->removeOperand(3);
2368 break;
2369 }
2370#undef FROM_TO
2371 }
2372 [[fallthrough]];
2373 case X86::PBLENDWrri:
2374 case X86::VBLENDPDYrri:
2375 case X86::VBLENDPSYrri:
2376 case X86::VPBLENDDrri:
2377 case X86::VPBLENDWrri:
2378 case X86::VPBLENDDYrri:
2379 case X86::VPBLENDWYrri: {
2380 int8_t Mask;
2381 switch (Opc) {
2382 default:
2383 llvm_unreachable("Unreachable!");
2384 case X86::BLENDPDrri:
2385 Mask = (int8_t)0x03;
2386 break;
2387 case X86::BLENDPSrri:
2388 Mask = (int8_t)0x0F;
2389 break;
2390 case X86::PBLENDWrri:
2391 Mask = (int8_t)0xFF;
2392 break;
2393 case X86::VBLENDPDrri:
2394 Mask = (int8_t)0x03;
2395 break;
2396 case X86::VBLENDPSrri:
2397 Mask = (int8_t)0x0F;
2398 break;
2399 case X86::VBLENDPDYrri:
2400 Mask = (int8_t)0x0F;
2401 break;
2402 case X86::VBLENDPSYrri:
2403 Mask = (int8_t)0xFF;
2404 break;
2405 case X86::VPBLENDDrri:
2406 Mask = (int8_t)0x0F;
2407 break;
2408 case X86::VPBLENDWrri:
2409 Mask = (int8_t)0xFF;
2410 break;
2411 case X86::VPBLENDDYrri:
2412 Mask = (int8_t)0xFF;
2413 break;
2414 case X86::VPBLENDWYrri:
2415 Mask = (int8_t)0xFF;
2416 break;
2417 }
2418 // Only the least significant bits of Imm are used.
2419 // Using int8_t to ensure it will be sign extended to the int64_t that
2420 // setImm takes in order to match isel behavior.
2421 int8_t Imm = MI.getOperand(3).getImm() & Mask;
2422 WorkingMI = CloneIfNew(MI);
2423 WorkingMI->getOperand(3).setImm(Mask ^ Imm);
2424 break;
2425 }
2426 case X86::INSERTPSrri:
2427 case X86::VINSERTPSrri:
2428 case X86::VINSERTPSZrri: {
2429 unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm();
2430 unsigned ZMask = Imm & 15;
2431 unsigned DstIdx = (Imm >> 4) & 3;
2432 unsigned SrcIdx = (Imm >> 6) & 3;
2433
2434 // We can commute insertps if we zero 2 of the elements, the insertion is
2435 // "inline" and we don't override the insertion with a zero.
2436 if (DstIdx == SrcIdx && (ZMask & (1 << DstIdx)) == 0 &&
2437 llvm::popcount(ZMask) == 2) {
2438 unsigned AltIdx = llvm::countr_zero((ZMask | (1 << DstIdx)) ^ 15);
2439 assert(AltIdx < 4 && "Illegal insertion index");
2440 unsigned AltImm = (AltIdx << 6) | (AltIdx << 4) | ZMask;
2441 WorkingMI = CloneIfNew(MI);
2442 WorkingMI->getOperand(MI.getNumOperands() - 1).setImm(AltImm);
2443 break;
2444 }
2445 return nullptr;
2446 }
2447 case X86::MOVSDrr:
2448 case X86::MOVSSrr:
2449 case X86::VMOVSDrr:
2450 case X86::VMOVSSrr: {
2451 // On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD.
2452 if (Subtarget.hasSSE41()) {
2453 unsigned Mask;
2454 switch (Opc) {
2455 default:
2456 llvm_unreachable("Unreachable!");
2457 case X86::MOVSDrr:
2458 Opc = X86::BLENDPDrri;
2459 Mask = 0x02;
2460 break;
2461 case X86::MOVSSrr:
2462 Opc = X86::BLENDPSrri;
2463 Mask = 0x0E;
2464 break;
2465 case X86::VMOVSDrr:
2466 Opc = X86::VBLENDPDrri;
2467 Mask = 0x02;
2468 break;
2469 case X86::VMOVSSrr:
2470 Opc = X86::VBLENDPSrri;
2471 Mask = 0x0E;
2472 break;
2473 }
2474
2475 WorkingMI = CloneIfNew(MI);
2476 WorkingMI->setDesc(get(Opc));
2477 WorkingMI->addOperand(MachineOperand::CreateImm(Mask));
2478 break;
2479 }
2480
2481 WorkingMI = CloneIfNew(MI);
2482 WorkingMI->setDesc(get(X86::SHUFPDrri));
2483 WorkingMI->addOperand(MachineOperand::CreateImm(0x02));
2484 break;
2485 }
2486 case X86::SHUFPDrri: {
2487 // Commute to MOVSD.
2488 assert(MI.getOperand(3).getImm() == 0x02 && "Unexpected immediate!");
2489 WorkingMI = CloneIfNew(MI);
2490 WorkingMI->setDesc(get(X86::MOVSDrr));
2491 WorkingMI->removeOperand(3);
2492 break;
2493 }
2494 case X86::PCLMULQDQrri:
2495 case X86::VPCLMULQDQrri:
2496 case X86::VPCLMULQDQYrri:
2497 case X86::VPCLMULQDQZrri:
2498 case X86::VPCLMULQDQZ128rri:
2499 case X86::VPCLMULQDQZ256rri: {
2500 // SRC1 64bits = Imm[0] ? SRC1[127:64] : SRC1[63:0]
2501 // SRC2 64bits = Imm[4] ? SRC2[127:64] : SRC2[63:0]
2502 unsigned Imm = MI.getOperand(3).getImm();
2503 unsigned Src1Hi = Imm & 0x01;
2504 unsigned Src2Hi = Imm & 0x10;
2505 WorkingMI = CloneIfNew(MI);
2506 WorkingMI->getOperand(3).setImm((Src1Hi << 4) | (Src2Hi >> 4));
2507 break;
2508 }
2509 case X86::VPCMPBZ128rri:
2510 case X86::VPCMPUBZ128rri:
2511 case X86::VPCMPBZ256rri:
2512 case X86::VPCMPUBZ256rri:
2513 case X86::VPCMPBZrri:
2514 case X86::VPCMPUBZrri:
2515 case X86::VPCMPDZ128rri:
2516 case X86::VPCMPUDZ128rri:
2517 case X86::VPCMPDZ256rri:
2518 case X86::VPCMPUDZ256rri:
2519 case X86::VPCMPDZrri:
2520 case X86::VPCMPUDZrri:
2521 case X86::VPCMPQZ128rri:
2522 case X86::VPCMPUQZ128rri:
2523 case X86::VPCMPQZ256rri:
2524 case X86::VPCMPUQZ256rri:
2525 case X86::VPCMPQZrri:
2526 case X86::VPCMPUQZrri:
2527 case X86::VPCMPWZ128rri:
2528 case X86::VPCMPUWZ128rri:
2529 case X86::VPCMPWZ256rri:
2530 case X86::VPCMPUWZ256rri:
2531 case X86::VPCMPWZrri:
2532 case X86::VPCMPUWZrri:
2533 case X86::VPCMPBZ128rrik:
2534 case X86::VPCMPUBZ128rrik:
2535 case X86::VPCMPBZ256rrik:
2536 case X86::VPCMPUBZ256rrik:
2537 case X86::VPCMPBZrrik:
2538 case X86::VPCMPUBZrrik:
2539 case X86::VPCMPDZ128rrik:
2540 case X86::VPCMPUDZ128rrik:
2541 case X86::VPCMPDZ256rrik:
2542 case X86::VPCMPUDZ256rrik:
2543 case X86::VPCMPDZrrik:
2544 case X86::VPCMPUDZrrik:
2545 case X86::VPCMPQZ128rrik:
2546 case X86::VPCMPUQZ128rrik:
2547 case X86::VPCMPQZ256rrik:
2548 case X86::VPCMPUQZ256rrik:
2549 case X86::VPCMPQZrrik:
2550 case X86::VPCMPUQZrrik:
2551 case X86::VPCMPWZ128rrik:
2552 case X86::VPCMPUWZ128rrik:
2553 case X86::VPCMPWZ256rrik:
2554 case X86::VPCMPUWZ256rrik:
2555 case X86::VPCMPWZrrik:
2556 case X86::VPCMPUWZrrik:
2557 WorkingMI = CloneIfNew(MI);
2558 // Flip comparison mode immediate (if necessary).
2559 WorkingMI->getOperand(MI.getNumOperands() - 1)
2561 MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x7));
2562 break;
2563 case X86::VPCOMBri:
2564 case X86::VPCOMUBri:
2565 case X86::VPCOMDri:
2566 case X86::VPCOMUDri:
2567 case X86::VPCOMQri:
2568 case X86::VPCOMUQri:
2569 case X86::VPCOMWri:
2570 case X86::VPCOMUWri:
2571 WorkingMI = CloneIfNew(MI);
2572 // Flip comparison mode immediate (if necessary).
2573 WorkingMI->getOperand(3).setImm(
2574 X86::getSwappedVPCOMImm(MI.getOperand(3).getImm() & 0x7));
2575 break;
2576 case X86::VCMPSDZrri:
2577 case X86::VCMPSSZrri:
2578 case X86::VCMPPDZrri:
2579 case X86::VCMPPSZrri:
2580 case X86::VCMPSHZrri:
2581 case X86::VCMPPHZrri:
2582 case X86::VCMPPHZ128rri:
2583 case X86::VCMPPHZ256rri:
2584 case X86::VCMPPDZ128rri:
2585 case X86::VCMPPSZ128rri:
2586 case X86::VCMPPDZ256rri:
2587 case X86::VCMPPSZ256rri:
2588 case X86::VCMPPDZrrik:
2589 case X86::VCMPPSZrrik:
2590 case X86::VCMPPDZ128rrik:
2591 case X86::VCMPPSZ128rrik:
2592 case X86::VCMPPDZ256rrik:
2593 case X86::VCMPPSZ256rrik:
2594 WorkingMI = CloneIfNew(MI);
2595 WorkingMI->getOperand(MI.getNumExplicitOperands() - 1)
2597 MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 0x1f));
2598 break;
2599 case X86::VPERM2F128rri:
2600 case X86::VPERM2I128rri:
2601 // Flip permute source immediate.
2602 // Imm & 0x02: lo = if set, select Op1.lo/hi else Op0.lo/hi.
2603 // Imm & 0x20: hi = if set, select Op1.lo/hi else Op0.lo/hi.
2604 WorkingMI = CloneIfNew(MI);
2605 WorkingMI->getOperand(3).setImm((MI.getOperand(3).getImm() & 0xFF) ^ 0x22);
2606 break;
2607 case X86::MOVHLPSrr:
2608 case X86::UNPCKHPDrr:
2609 case X86::VMOVHLPSrr:
2610 case X86::VUNPCKHPDrr:
2611 case X86::VMOVHLPSZrr:
2612 case X86::VUNPCKHPDZ128rr:
2613 assert(Subtarget.hasSSE2() && "Commuting MOVHLP/UNPCKHPD requires SSE2!");
2614
2615 switch (Opc) {
2616 default:
2617 llvm_unreachable("Unreachable!");
2618 case X86::MOVHLPSrr:
2619 Opc = X86::UNPCKHPDrr;
2620 break;
2621 case X86::UNPCKHPDrr:
2622 Opc = X86::MOVHLPSrr;
2623 break;
2624 case X86::VMOVHLPSrr:
2625 Opc = X86::VUNPCKHPDrr;
2626 break;
2627 case X86::VUNPCKHPDrr:
2628 Opc = X86::VMOVHLPSrr;
2629 break;
2630 case X86::VMOVHLPSZrr:
2631 Opc = X86::VUNPCKHPDZ128rr;
2632 break;
2633 case X86::VUNPCKHPDZ128rr:
2634 Opc = X86::VMOVHLPSZrr;
2635 break;
2636 }
2637 WorkingMI = CloneIfNew(MI);
2638 WorkingMI->setDesc(get(Opc));
2639 break;
2640 CASE_ND(CMOV16rr)
2641 CASE_ND(CMOV32rr)
2642 CASE_ND(CMOV64rr) {
2643 WorkingMI = CloneIfNew(MI);
2644 unsigned OpNo = MI.getDesc().getNumOperands() - 1;
2645 X86::CondCode CC = static_cast<X86::CondCode>(MI.getOperand(OpNo).getImm());
2647 break;
2648 }
2649 case X86::VPTERNLOGDZrri:
2650 case X86::VPTERNLOGDZrmi:
2651 case X86::VPTERNLOGDZ128rri:
2652 case X86::VPTERNLOGDZ128rmi:
2653 case X86::VPTERNLOGDZ256rri:
2654 case X86::VPTERNLOGDZ256rmi:
2655 case X86::VPTERNLOGQZrri:
2656 case X86::VPTERNLOGQZrmi:
2657 case X86::VPTERNLOGQZ128rri:
2658 case X86::VPTERNLOGQZ128rmi:
2659 case X86::VPTERNLOGQZ256rri:
2660 case X86::VPTERNLOGQZ256rmi:
2661 case X86::VPTERNLOGDZrrik:
2662 case X86::VPTERNLOGDZ128rrik:
2663 case X86::VPTERNLOGDZ256rrik:
2664 case X86::VPTERNLOGQZrrik:
2665 case X86::VPTERNLOGQZ128rrik:
2666 case X86::VPTERNLOGQZ256rrik:
2667 case X86::VPTERNLOGDZrrikz:
2668 case X86::VPTERNLOGDZrmikz:
2669 case X86::VPTERNLOGDZ128rrikz:
2670 case X86::VPTERNLOGDZ128rmikz:
2671 case X86::VPTERNLOGDZ256rrikz:
2672 case X86::VPTERNLOGDZ256rmikz:
2673 case X86::VPTERNLOGQZrrikz:
2674 case X86::VPTERNLOGQZrmikz:
2675 case X86::VPTERNLOGQZ128rrikz:
2676 case X86::VPTERNLOGQZ128rmikz:
2677 case X86::VPTERNLOGQZ256rrikz:
2678 case X86::VPTERNLOGQZ256rmikz:
2679 case X86::VPTERNLOGDZ128rmbi:
2680 case X86::VPTERNLOGDZ256rmbi:
2681 case X86::VPTERNLOGDZrmbi:
2682 case X86::VPTERNLOGQZ128rmbi:
2683 case X86::VPTERNLOGQZ256rmbi:
2684 case X86::VPTERNLOGQZrmbi:
2685 case X86::VPTERNLOGDZ128rmbikz:
2686 case X86::VPTERNLOGDZ256rmbikz:
2687 case X86::VPTERNLOGDZrmbikz:
2688 case X86::VPTERNLOGQZ128rmbikz:
2689 case X86::VPTERNLOGQZ256rmbikz:
2690 case X86::VPTERNLOGQZrmbikz: {
2691 WorkingMI = CloneIfNew(MI);
2692 commuteVPTERNLOG(*WorkingMI, OpIdx1, OpIdx2);
2693 break;
2694 }
2695 default:
2697 WorkingMI = CloneIfNew(MI);
2698 WorkingMI->setDesc(get(getCommutedVPERMV3Opcode(Opc)));
2699 break;
2700 }
2701
2702 if (auto *FMA3Group = getFMA3Group(Opc, MI.getDesc().TSFlags)) {
2703 WorkingMI = CloneIfNew(MI);
2704 WorkingMI->setDesc(
2705 get(getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group)));
2706 break;
2707 }
2708 }
2709 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2710}
2711
2712bool X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
2713 unsigned &SrcOpIdx1,
2714 unsigned &SrcOpIdx2,
2715 bool IsIntrinsic) const {
2716 uint64_t TSFlags = MI.getDesc().TSFlags;
2717
2718 unsigned FirstCommutableVecOp = 1;
2719 unsigned LastCommutableVecOp = 3;
2720 unsigned KMaskOp = -1U;
2721 if (X86II::isKMasked(TSFlags)) {
2722 // For k-zero-masked operations it is Ok to commute the first vector
2723 // operand. Unless this is an intrinsic instruction.
2724 // For regular k-masked operations a conservative choice is done as the
2725 // elements of the first vector operand, for which the corresponding bit
2726 // in the k-mask operand is set to 0, are copied to the result of the
2727 // instruction.
2728 // TODO/FIXME: The commute still may be legal if it is known that the
2729 // k-mask operand is set to either all ones or all zeroes.
2730 // It is also Ok to commute the 1st operand if all users of MI use only
2731 // the elements enabled by the k-mask operand. For example,
2732 // v4 = VFMADD213PSZrk v1, k, v2, v3; // v1[i] = k[i] ? v2[i]*v1[i]+v3[i]
2733 // : v1[i];
2734 // VMOVAPSZmrk <mem_addr>, k, v4; // this is the ONLY user of v4 ->
2735 // // Ok, to commute v1 in FMADD213PSZrk.
2736
2737 // The k-mask operand has index = 2 for masked and zero-masked operations.
2738 KMaskOp = 2;
2739
2740 // The operand with index = 1 is used as a source for those elements for
2741 // which the corresponding bit in the k-mask is set to 0.
2742 if (X86II::isKMergeMasked(TSFlags) || IsIntrinsic)
2743 FirstCommutableVecOp = 3;
2744
2745 LastCommutableVecOp++;
2746 } else if (IsIntrinsic) {
2747 // Commuting the first operand of an intrinsic instruction isn't possible
2748 // unless we can prove that only the lowest element of the result is used.
2749 FirstCommutableVecOp = 2;
2750 }
2751
2752 if (isMem(MI, LastCommutableVecOp))
2753 LastCommutableVecOp--;
2754
2755 // Only the first RegOpsNum operands are commutable.
2756 // Also, the value 'CommuteAnyOperandIndex' is valid here as it means
2757 // that the operand is not specified/fixed.
2758 if (SrcOpIdx1 != CommuteAnyOperandIndex &&
2759 (SrcOpIdx1 < FirstCommutableVecOp || SrcOpIdx1 > LastCommutableVecOp ||
2760 SrcOpIdx1 == KMaskOp))
2761 return false;
2762 if (SrcOpIdx2 != CommuteAnyOperandIndex &&
2763 (SrcOpIdx2 < FirstCommutableVecOp || SrcOpIdx2 > LastCommutableVecOp ||
2764 SrcOpIdx2 == KMaskOp))
2765 return false;
2766
2767 // Look for two different register operands assumed to be commutable
2768 // regardless of the FMA opcode. The FMA opcode is adjusted later.
2769 if (SrcOpIdx1 == CommuteAnyOperandIndex ||
2770 SrcOpIdx2 == CommuteAnyOperandIndex) {
2771 unsigned CommutableOpIdx2 = SrcOpIdx2;
2772
2773 // At least one of operands to be commuted is not specified and
2774 // this method is free to choose appropriate commutable operands.
2775 if (SrcOpIdx1 == SrcOpIdx2)
2776 // Both of operands are not fixed. By default set one of commutable
2777 // operands to the last register operand of the instruction.
2778 CommutableOpIdx2 = LastCommutableVecOp;
2779 else if (SrcOpIdx2 == CommuteAnyOperandIndex)
2780 // Only one of operands is not fixed.
2781 CommutableOpIdx2 = SrcOpIdx1;
2782
2783 // CommutableOpIdx2 is well defined now. Let's choose another commutable
2784 // operand and assign its index to CommutableOpIdx1.
2785 Register Op2Reg = MI.getOperand(CommutableOpIdx2).getReg();
2786
2787 unsigned CommutableOpIdx1;
2788 for (CommutableOpIdx1 = LastCommutableVecOp;
2789 CommutableOpIdx1 >= FirstCommutableVecOp; CommutableOpIdx1--) {
2790 // Just ignore and skip the k-mask operand.
2791 if (CommutableOpIdx1 == KMaskOp)
2792 continue;
2793
2794 // The commuted operands must have different registers.
2795 // Otherwise, the commute transformation does not change anything and
2796 // is useless then.
2797 if (Op2Reg != MI.getOperand(CommutableOpIdx1).getReg())
2798 break;
2799 }
2800
2801 // No appropriate commutable operands were found.
2802 if (CommutableOpIdx1 < FirstCommutableVecOp)
2803 return false;
2804
2805 // Assign the found pair of commutable indices to SrcOpIdx1 and SrcOpidx2
2806 // to return those values.
2807 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
2808 CommutableOpIdx2))
2809 return false;
2810 }
2811
2812 return true;
2813}
2814
2816 unsigned &SrcOpIdx1,
2817 unsigned &SrcOpIdx2) const {
2818 const MCInstrDesc &Desc = MI.getDesc();
2819 if (!Desc.isCommutable())
2820 return false;
2821
2822 switch (MI.getOpcode()) {
2823 case X86::CMPSDrri:
2824 case X86::CMPSSrri:
2825 case X86::CMPPDrri:
2826 case X86::CMPPSrri:
2827 case X86::VCMPSDrri:
2828 case X86::VCMPSSrri:
2829 case X86::VCMPPDrri:
2830 case X86::VCMPPSrri:
2831 case X86::VCMPPDYrri:
2832 case X86::VCMPPSYrri:
2833 case X86::VCMPSDZrri:
2834 case X86::VCMPSSZrri:
2835 case X86::VCMPPDZrri:
2836 case X86::VCMPPSZrri:
2837 case X86::VCMPSHZrri:
2838 case X86::VCMPPHZrri:
2839 case X86::VCMPPHZ128rri:
2840 case X86::VCMPPHZ256rri:
2841 case X86::VCMPPDZ128rri:
2842 case X86::VCMPPSZ128rri:
2843 case X86::VCMPPDZ256rri:
2844 case X86::VCMPPSZ256rri:
2845 case X86::VCMPPDZrrik:
2846 case X86::VCMPPSZrrik:
2847 case X86::VCMPPDZ128rrik:
2848 case X86::VCMPPSZ128rrik:
2849 case X86::VCMPPDZ256rrik:
2850 case X86::VCMPPSZ256rrik: {
2851 unsigned OpOffset = X86II::isKMasked(Desc.TSFlags) ? 1 : 0;
2852
2853 // Float comparison can be safely commuted for
2854 // Ordered/Unordered/Equal/NotEqual tests
2855 unsigned Imm = MI.getOperand(3 + OpOffset).getImm() & 0x7;
2856 switch (Imm) {
2857 default:
2858 // EVEX versions can be commuted.
2859 if ((Desc.TSFlags & X86II::EncodingMask) == X86II::EVEX)
2860 break;
2861 return false;
2862 case 0x00: // EQUAL
2863 case 0x03: // UNORDERED
2864 case 0x04: // NOT EQUAL
2865 case 0x07: // ORDERED
2866 break;
2867 }
2868
2869 // The indices of the commutable operands are 1 and 2 (or 2 and 3
2870 // when masked).
2871 // Assign them to the returned operand indices here.
2872 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1 + OpOffset,
2873 2 + OpOffset);
2874 }
2875 case X86::MOVSSrr:
2876 // X86::MOVSDrr is always commutable. MOVSS is only commutable if we can
2877 // form sse4.1 blend. We assume VMOVSSrr/VMOVSDrr is always commutable since
2878 // AVX implies sse4.1.
2879 if (Subtarget.hasSSE41())
2880 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2881 return false;
2882 case X86::SHUFPDrri:
2883 // We can commute this to MOVSD.
2884 if (MI.getOperand(3).getImm() == 0x02)
2885 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2886 return false;
2887 case X86::MOVHLPSrr:
2888 case X86::UNPCKHPDrr:
2889 case X86::VMOVHLPSrr:
2890 case X86::VUNPCKHPDrr:
2891 case X86::VMOVHLPSZrr:
2892 case X86::VUNPCKHPDZ128rr:
2893 if (Subtarget.hasSSE2())
2894 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2895 return false;
2896 case X86::VPTERNLOGDZrri:
2897 case X86::VPTERNLOGDZrmi:
2898 case X86::VPTERNLOGDZ128rri:
2899 case X86::VPTERNLOGDZ128rmi:
2900 case X86::VPTERNLOGDZ256rri:
2901 case X86::VPTERNLOGDZ256rmi:
2902 case X86::VPTERNLOGQZrri:
2903 case X86::VPTERNLOGQZrmi:
2904 case X86::VPTERNLOGQZ128rri:
2905 case X86::VPTERNLOGQZ128rmi:
2906 case X86::VPTERNLOGQZ256rri:
2907 case X86::VPTERNLOGQZ256rmi:
2908 case X86::VPTERNLOGDZrrik:
2909 case X86::VPTERNLOGDZ128rrik:
2910 case X86::VPTERNLOGDZ256rrik:
2911 case X86::VPTERNLOGQZrrik:
2912 case X86::VPTERNLOGQZ128rrik:
2913 case X86::VPTERNLOGQZ256rrik:
2914 case X86::VPTERNLOGDZrrikz:
2915 case X86::VPTERNLOGDZrmikz:
2916 case X86::VPTERNLOGDZ128rrikz:
2917 case X86::VPTERNLOGDZ128rmikz:
2918 case X86::VPTERNLOGDZ256rrikz:
2919 case X86::VPTERNLOGDZ256rmikz:
2920 case X86::VPTERNLOGQZrrikz:
2921 case X86::VPTERNLOGQZrmikz:
2922 case X86::VPTERNLOGQZ128rrikz:
2923 case X86::VPTERNLOGQZ128rmikz:
2924 case X86::VPTERNLOGQZ256rrikz:
2925 case X86::VPTERNLOGQZ256rmikz:
2926 case X86::VPTERNLOGDZ128rmbi:
2927 case X86::VPTERNLOGDZ256rmbi:
2928 case X86::VPTERNLOGDZrmbi:
2929 case X86::VPTERNLOGQZ128rmbi:
2930 case X86::VPTERNLOGQZ256rmbi:
2931 case X86::VPTERNLOGQZrmbi:
2932 case X86::VPTERNLOGDZ128rmbikz:
2933 case X86::VPTERNLOGDZ256rmbikz:
2934 case X86::VPTERNLOGDZrmbikz:
2935 case X86::VPTERNLOGQZ128rmbikz:
2936 case X86::VPTERNLOGQZ256rmbikz:
2937 case X86::VPTERNLOGQZrmbikz:
2938 return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2939 case X86::VPDPWSSDYrr:
2940 case X86::VPDPWSSDrr:
2941 case X86::VPDPWSSDSYrr:
2942 case X86::VPDPWSSDSrr:
2943 case X86::VPDPWUUDrr:
2944 case X86::VPDPWUUDYrr:
2945 case X86::VPDPWUUDSrr:
2946 case X86::VPDPWUUDSYrr:
2947 case X86::VPDPBSSDSrr:
2948 case X86::VPDPBSSDSYrr:
2949 case X86::VPDPBSSDrr:
2950 case X86::VPDPBSSDYrr:
2951 case X86::VPDPBUUDSrr:
2952 case X86::VPDPBUUDSYrr:
2953 case X86::VPDPBUUDrr:
2954 case X86::VPDPBUUDYrr:
2955 case X86::VPDPBSSDSZ128r:
2956 case X86::VPDPBSSDSZ128rk:
2957 case X86::VPDPBSSDSZ128rkz:
2958 case X86::VPDPBSSDSZ256r:
2959 case X86::VPDPBSSDSZ256rk:
2960 case X86::VPDPBSSDSZ256rkz:
2961 case X86::VPDPBSSDSZr:
2962 case X86::VPDPBSSDSZrk:
2963 case X86::VPDPBSSDSZrkz:
2964 case X86::VPDPBSSDZ128r:
2965 case X86::VPDPBSSDZ128rk:
2966 case X86::VPDPBSSDZ128rkz:
2967 case X86::VPDPBSSDZ256r:
2968 case X86::VPDPBSSDZ256rk:
2969 case X86::VPDPBSSDZ256rkz:
2970 case X86::VPDPBSSDZr:
2971 case X86::VPDPBSSDZrk:
2972 case X86::VPDPBSSDZrkz:
2973 case X86::VPDPBUUDSZ128r:
2974 case X86::VPDPBUUDSZ128rk:
2975 case X86::VPDPBUUDSZ128rkz:
2976 case X86::VPDPBUUDSZ256r:
2977 case X86::VPDPBUUDSZ256rk:
2978 case X86::VPDPBUUDSZ256rkz:
2979 case X86::VPDPBUUDSZr:
2980 case X86::VPDPBUUDSZrk:
2981 case X86::VPDPBUUDSZrkz:
2982 case X86::VPDPBUUDZ128r:
2983 case X86::VPDPBUUDZ128rk:
2984 case X86::VPDPBUUDZ128rkz:
2985 case X86::VPDPBUUDZ256r:
2986 case X86::VPDPBUUDZ256rk:
2987 case X86::VPDPBUUDZ256rkz:
2988 case X86::VPDPBUUDZr:
2989 case X86::VPDPBUUDZrk:
2990 case X86::VPDPBUUDZrkz:
2991 case X86::VPDPWSSDZ128r:
2992 case X86::VPDPWSSDZ128rk:
2993 case X86::VPDPWSSDZ128rkz:
2994 case X86::VPDPWSSDZ256r:
2995 case X86::VPDPWSSDZ256rk:
2996 case X86::VPDPWSSDZ256rkz:
2997 case X86::VPDPWSSDZr:
2998 case X86::VPDPWSSDZrk:
2999 case X86::VPDPWSSDZrkz:
3000 case X86::VPDPWSSDSZ128r:
3001 case X86::VPDPWSSDSZ128rk:
3002 case X86::VPDPWSSDSZ128rkz:
3003 case X86::VPDPWSSDSZ256r:
3004 case X86::VPDPWSSDSZ256rk:
3005 case X86::VPDPWSSDSZ256rkz:
3006 case X86::VPDPWSSDSZr:
3007 case X86::VPDPWSSDSZrk:
3008 case X86::VPDPWSSDSZrkz:
3009 case X86::VPDPWUUDZ128r:
3010 case X86::VPDPWUUDZ128rk:
3011 case X86::VPDPWUUDZ128rkz:
3012 case X86::VPDPWUUDZ256r:
3013 case X86::VPDPWUUDZ256rk:
3014 case X86::VPDPWUUDZ256rkz:
3015 case X86::VPDPWUUDZr:
3016 case X86::VPDPWUUDZrk:
3017 case X86::VPDPWUUDZrkz:
3018 case X86::VPDPWUUDSZ128r:
3019 case X86::VPDPWUUDSZ128rk:
3020 case X86::VPDPWUUDSZ128rkz:
3021 case X86::VPDPWUUDSZ256r:
3022 case X86::VPDPWUUDSZ256rk:
3023 case X86::VPDPWUUDSZ256rkz:
3024 case X86::VPDPWUUDSZr:
3025 case X86::VPDPWUUDSZrk:
3026 case X86::VPDPWUUDSZrkz:
3027 case X86::VPMADD52HUQrr:
3028 case X86::VPMADD52HUQYrr:
3029 case X86::VPMADD52HUQZ128r:
3030 case X86::VPMADD52HUQZ128rk:
3031 case X86::VPMADD52HUQZ128rkz:
3032 case X86::VPMADD52HUQZ256r:
3033 case X86::VPMADD52HUQZ256rk:
3034 case X86::VPMADD52HUQZ256rkz:
3035 case X86::VPMADD52HUQZr:
3036 case X86::VPMADD52HUQZrk:
3037 case X86::VPMADD52HUQZrkz:
3038 case X86::VPMADD52LUQrr:
3039 case X86::VPMADD52LUQYrr:
3040 case X86::VPMADD52LUQZ128r:
3041 case X86::VPMADD52LUQZ128rk:
3042 case X86::VPMADD52LUQZ128rkz:
3043 case X86::VPMADD52LUQZ256r:
3044 case X86::VPMADD52LUQZ256rk:
3045 case X86::VPMADD52LUQZ256rkz:
3046 case X86::VPMADD52LUQZr:
3047 case X86::VPMADD52LUQZrk:
3048 case X86::VPMADD52LUQZrkz:
3049 case X86::VFMADDCPHZr:
3050 case X86::VFMADDCPHZrk:
3051 case X86::VFMADDCPHZrkz:
3052 case X86::VFMADDCPHZ128r:
3053 case X86::VFMADDCPHZ128rk:
3054 case X86::VFMADDCPHZ128rkz:
3055 case X86::VFMADDCPHZ256r:
3056 case X86::VFMADDCPHZ256rk:
3057 case X86::VFMADDCPHZ256rkz:
3058 case X86::VFMADDCSHZr:
3059 case X86::VFMADDCSHZrk:
3060 case X86::VFMADDCSHZrkz: {
3061 unsigned CommutableOpIdx1 = 2;
3062 unsigned CommutableOpIdx2 = 3;
3063 if (X86II::isKMasked(Desc.TSFlags)) {
3064 // Skip the mask register.
3065 ++CommutableOpIdx1;
3066 ++CommutableOpIdx2;
3067 }
3068 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
3069 CommutableOpIdx2))
3070 return false;
3071 if (!MI.getOperand(SrcOpIdx1).isReg() || !MI.getOperand(SrcOpIdx2).isReg())
3072 // No idea.
3073 return false;
3074 return true;
3075 }
3076
3077 default:
3078 const X86InstrFMA3Group *FMA3Group =
3079 getFMA3Group(MI.getOpcode(), MI.getDesc().TSFlags);
3080 if (FMA3Group)
3081 return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2,
3082 FMA3Group->isIntrinsic());
3083
3084 // Handled masked instructions since we need to skip over the mask input
3085 // and the preserved input.
3086 if (X86II::isKMasked(Desc.TSFlags)) {
3087 // First assume that the first input is the mask operand and skip past it.
3088 unsigned CommutableOpIdx1 = Desc.getNumDefs() + 1;
3089 unsigned CommutableOpIdx2 = Desc.getNumDefs() + 2;
3090 // Check if the first input is tied. If there isn't one then we only
3091 // need to skip the mask operand which we did above.
3092 if ((MI.getDesc().getOperandConstraint(Desc.getNumDefs(),
3093 MCOI::TIED_TO) != -1)) {
3094 // If this is zero masking instruction with a tied operand, we need to
3095 // move the first index back to the first input since this must
3096 // be a 3 input instruction and we want the first two non-mask inputs.
3097 // Otherwise this is a 2 input instruction with a preserved input and
3098 // mask, so we need to move the indices to skip one more input.
3099 if (X86II::isKMergeMasked(Desc.TSFlags)) {
3100 ++CommutableOpIdx1;
3101 ++CommutableOpIdx2;
3102 } else {
3103 --CommutableOpIdx1;
3104 }
3105 }
3106
3107 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
3108 CommutableOpIdx2))
3109 return false;
3110
3111 if (!MI.getOperand(SrcOpIdx1).isReg() ||
3112 !MI.getOperand(SrcOpIdx2).isReg())
3113 // No idea.
3114 return false;
3115 return true;
3116 }
3117
3118 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
3119 }
3120 return false;
3121}
3122
3124 unsigned Opcode = MI->getOpcode();
3125 if (Opcode != X86::LEA32r && Opcode != X86::LEA64r &&
3126 Opcode != X86::LEA64_32r)
3127 return false;
3128
3129 const MachineOperand &Scale = MI->getOperand(1 + X86::AddrScaleAmt);
3130 const MachineOperand &Disp = MI->getOperand(1 + X86::AddrDisp);
3131 const MachineOperand &Segment = MI->getOperand(1 + X86::AddrSegmentReg);
3132
3133 if (Segment.getReg() != 0 || !Disp.isImm() || Disp.getImm() != 0 ||
3134 Scale.getImm() > 1)
3135 return false;
3136
3137 return true;
3138}
3139
3141 // Currently we're interested in following sequence only.
3142 // r3 = lea r1, r2
3143 // r5 = add r3, r4
3144 // Both r3 and r4 are killed in add, we hope the add instruction has the
3145 // operand order
3146 // r5 = add r4, r3
3147 // So later in X86FixupLEAs the lea instruction can be rewritten as add.
3148 unsigned Opcode = MI.getOpcode();
3149 if (Opcode != X86::ADD32rr && Opcode != X86::ADD64rr)
3150 return false;
3151
3152 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
3153 Register Reg1 = MI.getOperand(1).getReg();
3154 Register Reg2 = MI.getOperand(2).getReg();
3155
3156 // Check if Reg1 comes from LEA in the same MBB.
3157 if (MachineInstr *Inst = MRI.getUniqueVRegDef(Reg1)) {
3158 if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) {
3159 Commute = true;
3160 return true;
3161 }
3162 }
3163
3164 // Check if Reg2 comes from LEA in the same MBB.
3165 if (MachineInstr *Inst = MRI.getUniqueVRegDef(Reg2)) {
3166 if (isConvertibleLEA(Inst) && Inst->getParent() == MI.getParent()) {
3167 Commute = false;
3168 return true;
3169 }
3170 }
3171
3172 return false;
3173}
3174
3176 unsigned Opcode = MCID.getOpcode();
3177 if (!(X86::isJCC(Opcode) || X86::isSETCC(Opcode) || X86::isSETZUCC(Opcode) ||
3178 X86::isCMOVCC(Opcode) || X86::isCFCMOVCC(Opcode) ||
3179 X86::isCCMPCC(Opcode) || X86::isCTESTCC(Opcode)))
3180 return -1;
3181 // Assume that condition code is always the last use operand.
3182 unsigned NumUses = MCID.getNumOperands() - MCID.getNumDefs();
3183 return NumUses - 1;
3184}
3185
3187 const MCInstrDesc &MCID = MI.getDesc();
3188 int CondNo = getCondSrcNoFromDesc(MCID);
3189 if (CondNo < 0)
3190 return X86::COND_INVALID;
3191 CondNo += MCID.getNumDefs();
3192 return static_cast<X86::CondCode>(MI.getOperand(CondNo).getImm());
3193}
3194
3196 return X86::isJCC(MI.getOpcode()) ? X86::getCondFromMI(MI)
3198}
3199
3201 return X86::isSETCC(MI.getOpcode()) || X86::isSETZUCC(MI.getOpcode())
3204}
3205
3207 return X86::isCMOVCC(MI.getOpcode()) ? X86::getCondFromMI(MI)
3209}
3210
3212 return X86::isCFCMOVCC(MI.getOpcode()) ? X86::getCondFromMI(MI)
3214}
3215
3217 return X86::isCCMPCC(MI.getOpcode()) || X86::isCTESTCC(MI.getOpcode())
3220}
3221
3223 // CCMP/CTEST has two conditional operands:
3224 // - SCC: source conditonal code (same as CMOV)
3225 // - DCF: destination conditional flags, which has 4 valid bits
3226 //
3227 // +----+----+----+----+
3228 // | OF | SF | ZF | CF |
3229 // +----+----+----+----+
3230 //
3231 // If SCC(source conditional code) evaluates to false, CCMP/CTEST will updates
3232 // the conditional flags by as follows:
3233 //
3234 // OF = DCF.OF
3235 // SF = DCF.SF
3236 // ZF = DCF.ZF
3237 // CF = DCF.CF
3238 // PF = DCF.CF
3239 // AF = 0 (Auxiliary Carry Flag)
3240 //
3241 // Otherwise, the CMP or TEST is executed and it updates the
3242 // CSPAZO flags normally.
3243 //
3244 // NOTE:
3245 // If SCC = P, then SCC evaluates to true regardless of the CSPAZO value.
3246 // If SCC = NP, then SCC evaluates to false regardless of the CSPAZO value.
3247
3248 enum { CF = 1, ZF = 2, SF = 4, OF = 8, PF = CF };
3249
3250 switch (CC) {
3251 default:
3252 llvm_unreachable("Illegal condition code!");
3253 case X86::COND_NO:
3254 case X86::COND_NE:
3255 case X86::COND_GE:
3256 case X86::COND_G:
3257 case X86::COND_AE:
3258 case X86::COND_A:
3259 case X86::COND_NS:
3260 case X86::COND_NP:
3261 return 0;
3262 case X86::COND_O:
3263 return OF;
3264 case X86::COND_B:
3265 case X86::COND_BE:
3266 return CF;
3267 break;
3268 case X86::COND_E:
3269 case X86::COND_LE:
3270 return ZF;
3271 case X86::COND_S:
3272 case X86::COND_L:
3273 return SF;
3274 case X86::COND_P:
3275 return PF;
3276 }
3277}
3278
3279#define GET_X86_NF_TRANSFORM_TABLE
3280#define GET_X86_ND2NONND_TABLE
3281#include "X86GenInstrMapping.inc"
3282
3284 unsigned Opc) {
3285 const auto I = llvm::lower_bound(Table, Opc);
3286 return (I == Table.end() || I->OldOpc != Opc) ? 0U : I->NewOpc;
3287}
3288unsigned X86::getNFVariant(unsigned Opc) {
3289 return getNewOpcFromTable(X86NFTransformTable, Opc);
3290}
3291
3292unsigned X86::getNonNDVariant(unsigned Opc) {
3293 return getNewOpcFromTable(X86ND2NonNDTable, Opc);
3294}
3295
3296/// Return the inverse of the specified condition,
3297/// e.g. turning COND_E to COND_NE.
3299 switch (CC) {
3300 default:
3301 llvm_unreachable("Illegal condition code!");
3302 case X86::COND_E:
3303 return X86::COND_NE;
3304 case X86::COND_NE:
3305 return X86::COND_E;
3306 case X86::COND_L:
3307 return X86::COND_GE;
3308 case X86::COND_LE:
3309 return X86::COND_G;
3310 case X86::COND_G:
3311 return X86::COND_LE;
3312 case X86::COND_GE:
3313 return X86::COND_L;
3314 case X86::COND_B:
3315 return X86::COND_AE;
3316 case X86::COND_BE:
3317 return X86::COND_A;
3318 case X86::COND_A:
3319 return X86::COND_BE;
3320 case X86::COND_AE:
3321 return X86::COND_B;
3322 case X86::COND_S:
3323 return X86::COND_NS;
3324 case X86::COND_NS:
3325 return X86::COND_S;
3326 case X86::COND_P:
3327 return X86::COND_NP;
3328 case X86::COND_NP:
3329 return X86::COND_P;
3330 case X86::COND_O:
3331 return X86::COND_NO;
3332 case X86::COND_NO:
3333 return X86::COND_O;
3334 case X86::COND_NE_OR_P:
3335 return X86::COND_E_AND_NP;
3336 case X86::COND_E_AND_NP:
3337 return X86::COND_NE_OR_P;
3338 }
3339}
3340
3341/// Assuming the flags are set by MI(a,b), return the condition code if we
3342/// modify the instructions such that flags are set by MI(b,a).
3344 switch (CC) {
3345 default:
3346 return X86::COND_INVALID;
3347 case X86::COND_E:
3348 return X86::COND_E;
3349 case X86::COND_NE:
3350 return X86::COND_NE;
3351 case X86::COND_L:
3352 return X86::COND_G;
3353 case X86::COND_LE:
3354 return X86::COND_GE;
3355 case X86::COND_G:
3356 return X86::COND_L;
3357 case X86::COND_GE:
3358 return X86::COND_LE;
3359 case X86::COND_B:
3360 return X86::COND_A;
3361 case X86::COND_BE:
3362 return X86::COND_AE;
3363 case X86::COND_A:
3364 return X86::COND_B;
3365 case X86::COND_AE:
3366 return X86::COND_BE;
3367 }
3368}
3369
3370std::pair<X86::CondCode, bool>
3373 bool NeedSwap = false;
3374 switch (Predicate) {
3375 default:
3376 break;
3377 // Floating-point Predicates
3378 case CmpInst::FCMP_UEQ:
3379 CC = X86::COND_E;
3380 break;
3381 case CmpInst::FCMP_OLT:
3382 NeedSwap = true;
3383 [[fallthrough]];
3384 case CmpInst::FCMP_OGT:
3385 CC = X86::COND_A;
3386 break;
3387 case CmpInst::FCMP_OLE:
3388 NeedSwap = true;
3389 [[fallthrough]];
3390 case CmpInst::FCMP_OGE:
3391 CC = X86::COND_AE;
3392 break;
3393 case CmpInst::FCMP_UGT:
3394 NeedSwap = true;
3395 [[fallthrough]];
3396 case CmpInst::FCMP_ULT:
3397 CC = X86::COND_B;
3398 break;
3399 case CmpInst::FCMP_UGE:
3400 NeedSwap = true;
3401 [[fallthrough]];
3402 case CmpInst::FCMP_ULE:
3403 CC = X86::COND_BE;
3404 break;
3405 case CmpInst::FCMP_ONE:
3406 CC = X86::COND_NE;
3407 break;
3408 case CmpInst::FCMP_UNO:
3409 CC = X86::COND_P;
3410 break;
3411 case CmpInst::FCMP_ORD:
3412 CC = X86::COND_NP;
3413 break;
3414 case CmpInst::FCMP_OEQ:
3415 [[fallthrough]];
3416 case CmpInst::FCMP_UNE:
3418 break;
3419
3420 // Integer Predicates
3421 case CmpInst::ICMP_EQ:
3422 CC = X86::COND_E;
3423 break;
3424 case CmpInst::ICMP_NE:
3425 CC = X86::COND_NE;
3426 break;
3427 case CmpInst::ICMP_UGT:
3428 CC = X86::COND_A;
3429 break;
3430 case CmpInst::ICMP_UGE:
3431 CC = X86::COND_AE;
3432 break;
3433 case CmpInst::ICMP_ULT:
3434 CC = X86::COND_B;
3435 break;
3436 case CmpInst::ICMP_ULE:
3437 CC = X86::COND_BE;
3438 break;
3439 case CmpInst::ICMP_SGT:
3440 CC = X86::COND_G;
3441 break;
3442 case CmpInst::ICMP_SGE:
3443 CC = X86::COND_GE;
3444 break;
3445 case CmpInst::ICMP_SLT:
3446 CC = X86::COND_L;
3447 break;
3448 case CmpInst::ICMP_SLE:
3449 CC = X86::COND_LE;
3450 break;
3451 }
3452
3453 return std::make_pair(CC, NeedSwap);
3454}
3455
3456/// Return a cmov opcode for the given register size in bytes, and operand type.
3457unsigned X86::getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand,
3458 bool HasNDD) {
3459 switch (RegBytes) {
3460 default:
3461 llvm_unreachable("Illegal register size!");
3462#define GET_ND_IF_ENABLED(OPC) (HasNDD ? OPC##_ND : OPC)
3463 case 2:
3464 return HasMemoryOperand ? GET_ND_IF_ENABLED(X86::CMOV16rm)
3465 : GET_ND_IF_ENABLED(X86::CMOV16rr);
3466 case 4:
3467 return HasMemoryOperand ? GET_ND_IF_ENABLED(X86::CMOV32rm)
3468 : GET_ND_IF_ENABLED(X86::CMOV32rr);
3469 case 8:
3470 return HasMemoryOperand ? GET_ND_IF_ENABLED(X86::CMOV64rm)
3471 : GET_ND_IF_ENABLED(X86::CMOV64rr);
3472 }
3473}
3474
3475/// Get the VPCMP immediate for the given condition.
3477 switch (CC) {
3478 default:
3479 llvm_unreachable("Unexpected SETCC condition");
3480 case ISD::SETNE:
3481 return 4;
3482 case ISD::SETEQ:
3483 return 0;
3484 case ISD::SETULT:
3485 case ISD::SETLT:
3486 return 1;
3487 case ISD::SETUGT:
3488 case ISD::SETGT:
3489 return 6;
3490 case ISD::SETUGE:
3491 case ISD::SETGE:
3492 return 5;
3493 case ISD::SETULE:
3494 case ISD::SETLE:
3495 return 2;
3496 }
3497}
3498
3499/// Get the VPCMP immediate if the operands are swapped.
3500unsigned X86::getSwappedVPCMPImm(unsigned Imm) {
3501 switch (Imm) {
3502 default:
3503 llvm_unreachable("Unreachable!");
3504 case 0x01:
3505 Imm = 0x06;
3506 break; // LT -> NLE
3507 case 0x02:
3508 Imm = 0x05;
3509 break; // LE -> NLT
3510 case 0x05:
3511 Imm = 0x02;
3512 break; // NLT -> LE
3513 case 0x06:
3514 Imm = 0x01;
3515 break; // NLE -> LT
3516 case 0x00: // EQ
3517 case 0x03: // FALSE
3518 case 0x04: // NE
3519 case 0x07: // TRUE
3520 break;
3521 }
3522
3523 return Imm;
3524}
3525
3526/// Get the VPCOM immediate if the operands are swapped.
3527unsigned X86::getSwappedVPCOMImm(unsigned Imm) {
3528 switch (Imm) {
3529 default:
3530 llvm_unreachable("Unreachable!");
3531 case 0x00:
3532 Imm = 0x02;
3533 break; // LT -> GT
3534 case 0x01:
3535 Imm = 0x03;
3536 break; // LE -> GE
3537 case 0x02:
3538 Imm = 0x00;
3539 break; // GT -> LT
3540 case 0x03:
3541 Imm = 0x01;
3542 break; // GE -> LE
3543 case 0x04: // EQ
3544 case 0x05: // NE
3545 case 0x06: // FALSE
3546 case 0x07: // TRUE
3547 break;
3548 }
3549
3550 return Imm;
3551}
3552
3553/// Get the VCMP immediate if the operands are swapped.
3554unsigned X86::getSwappedVCMPImm(unsigned Imm) {
3555 // Only need the lower 2 bits to distinquish.
3556 switch (Imm & 0x3) {
3557 default:
3558 llvm_unreachable("Unreachable!");
3559 case 0x00:
3560 case 0x03:
3561 // EQ/NE/TRUE/FALSE/ORD/UNORD don't change immediate when commuted.
3562 break;
3563 case 0x01:
3564 case 0x02:
3565 // Need to toggle bits 3:0. Bit 4 stays the same.
3566 Imm ^= 0xf;
3567 break;
3568 }
3569
3570 return Imm;
3571}
3572
3574 if (Info.RegClass == X86::VR128RegClassID ||
3575 Info.RegClass == X86::VR128XRegClassID)
3576 return 128;
3577 if (Info.RegClass == X86::VR256RegClassID ||
3578 Info.RegClass == X86::VR256XRegClassID)
3579 return 256;
3580 if (Info.RegClass == X86::VR512RegClassID)
3581 return 512;
3582 llvm_unreachable("Unknown register class!");
3583}
3584
3585/// Return true if the Reg is X87 register.
3586static bool isX87Reg(unsigned Reg) {
3587 return (Reg == X86::FPCW || Reg == X86::FPSW ||
3588 (Reg >= X86::ST0 && Reg <= X86::ST7));
3589}
3590
3591/// check if the instruction is X87 instruction
3593 // Call and inlineasm defs X87 register, so we special case it here because
3594 // otherwise calls are incorrectly flagged as x87 instructions
3595 // as a result.
3596 if (MI.isCall() || MI.isInlineAsm())
3597 return false;
3598 for (const MachineOperand &MO : MI.operands()) {
3599 if (!MO.isReg())
3600 continue;
3601 if (isX87Reg(MO.getReg()))
3602 return true;
3603 }
3604 return false;
3605}
3606
3608 auto IsMemOp = [](const MCOperandInfo &OpInfo) {
3609 return OpInfo.OperandType == MCOI::OPERAND_MEMORY;
3610 };
3611
3612 const MCInstrDesc &Desc = MI.getDesc();
3613
3614 // Directly invoke the MC-layer routine for real (i.e., non-pseudo)
3615 // instructions (fast case).
3616 if (!X86II::isPseudo(Desc.TSFlags)) {
3617 int MemRefIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
3618 if (MemRefIdx >= 0)
3619 return MemRefIdx + X86II::getOperandBias(Desc);
3620#ifdef EXPENSIVE_CHECKS
3621 assert(none_of(Desc.operands(), IsMemOp) &&
3622 "Got false negative from X86II::getMemoryOperandNo()!");
3623#endif
3624 return -1;
3625 }
3626
3627 // Otherwise, handle pseudo instructions by examining the type of their
3628 // operands (slow case). An instruction cannot have a memory reference if it
3629 // has fewer than AddrNumOperands (= 5) explicit operands.
3630 unsigned NumOps = Desc.getNumOperands();
3631 if (NumOps < X86::AddrNumOperands) {
3632#ifdef EXPENSIVE_CHECKS
3633 assert(none_of(Desc.operands(), IsMemOp) &&
3634 "Expected no operands to have OPERAND_MEMORY type!");
3635#endif
3636 return -1;
3637 }
3638
3639 // The first operand with type OPERAND_MEMORY indicates the start of a memory
3640 // reference. We expect the following AddrNumOperand-1 operands to also have
3641 // OPERAND_MEMORY type.
3642 for (unsigned I = 0, E = NumOps - X86::AddrNumOperands; I != E; ++I) {
3643 if (IsMemOp(Desc.operands()[I])) {
3644#ifdef EXPENSIVE_CHECKS
3645 assert(std::all_of(Desc.operands().begin() + I,
3646 Desc.operands().begin() + I + X86::AddrNumOperands,
3647 IsMemOp) &&
3648 "Expected all five operands in the memory reference to have "
3649 "OPERAND_MEMORY type!");
3650#endif
3651 return I;
3652 }
3653 }
3654
3655 return -1;
3656}
3657
3659 unsigned OpNo) {
3660 assert(MI.getNumOperands() >= (OpNo + X86::AddrNumOperands) &&
3661 "Unexpected number of operands!");
3662
3663 const MachineOperand &Index = MI.getOperand(OpNo + X86::AddrIndexReg);
3664 if (!Index.isReg() || Index.getReg() != X86::NoRegister)
3665 return nullptr;
3666
3667 const MachineOperand &Disp = MI.getOperand(OpNo + X86::AddrDisp);
3668 if (!Disp.isCPI() || Disp.getOffset() != 0)
3669 return nullptr;
3670
3672 MI.getParent()->getParent()->getConstantPool()->getConstants();
3673 const MachineConstantPoolEntry &ConstantEntry = Constants[Disp.getIndex()];
3674
3675 // Bail if this is a machine constant pool entry, we won't be able to dig out
3676 // anything useful.
3677 if (ConstantEntry.isMachineConstantPoolEntry())
3678 return nullptr;
3679
3680 return ConstantEntry.Val.ConstVal;
3681}
3682
3684 switch (MI.getOpcode()) {
3685 case X86::TCRETURNdi:
3686 case X86::TCRETURNri:
3687 case X86::TCRETURNmi:
3688 case X86::TCRETURNdi64:
3689 case X86::TCRETURNri64:
3690 case X86::TCRETURNmi64:
3691 return true;
3692 default:
3693 return false;
3694 }
3695}
3696
3699 const MachineInstr &TailCall) const {
3700
3701 const MachineFunction *MF = TailCall.getMF();
3702
3703 if (MF->getTarget().getCodeModel() == CodeModel::Kernel) {
3704 // Kernel patches thunk calls in runtime, these should never be conditional.
3705 const MachineOperand &Target = TailCall.getOperand(0);
3706 if (Target.isSymbol()) {
3707 StringRef Symbol(Target.getSymbolName());
3708 // this is currently only relevant to r11/kernel indirect thunk.
3709 if (Symbol == "__x86_indirect_thunk_r11")
3710 return false;
3711 }
3712 }
3713
3714 if (TailCall.getOpcode() != X86::TCRETURNdi &&
3715 TailCall.getOpcode() != X86::TCRETURNdi64) {
3716 // Only direct calls can be done with a conditional branch.
3717 return false;
3718 }
3719
3720 if (Subtarget.isTargetWin64() && MF->hasWinCFI()) {
3721 // Conditional tail calls confuse the Win64 unwinder.
3722 return false;
3723 }
3724
3725 assert(BranchCond.size() == 1);
3726 if (BranchCond[0].getImm() > X86::LAST_VALID_COND) {
3727 // Can't make a conditional tail call with this condition.
3728 return false;
3729 }
3730
3732 if (X86FI->getTCReturnAddrDelta() != 0 ||
3733 TailCall.getOperand(1).getImm() != 0) {
3734 // A conditional tail call cannot do any stack adjustment.
3735 return false;
3736 }
3737
3738 return true;
3739}
3740
3743 const MachineInstr &TailCall) const {
3744 assert(canMakeTailCallConditional(BranchCond, TailCall));
3745
3747 while (I != MBB.begin()) {
3748 --I;
3749 if (I->isDebugInstr())
3750 continue;
3751 if (!I->isBranch())
3752 assert(0 && "Can't find the branch to replace!");
3753
3755 assert(BranchCond.size() == 1);
3756 if (CC != BranchCond[0].getImm())
3757 continue;
3758
3759 break;
3760 }
3761
3762 unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc
3763 : X86::TCRETURNdi64cc;
3764
3765 auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc));
3766 MIB->addOperand(TailCall.getOperand(0)); // Destination.
3767 MIB.addImm(0); // Stack offset (not used).
3768 MIB->addOperand(BranchCond[0]); // Condition.
3769 MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters.
3770
3771 // Add implicit uses and defs of all live regs potentially clobbered by the
3772 // call. This way they still appear live across the call.
3773 LivePhysRegs LiveRegs(getRegisterInfo());
3774 LiveRegs.addLiveOuts(MBB);
3776 LiveRegs.stepForward(*MIB, Clobbers);
3777 for (const auto &C : Clobbers) {
3778 MIB.addReg(C.first, RegState::Implicit);
3780 }
3781
3782 I->eraseFromParent();
3783}
3784
3785// Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may
3786// not be a fallthrough MBB now due to layout changes). Return nullptr if the
3787// fallthrough MBB cannot be identified.
3790 // Look for non-EHPad successors other than TBB. If we find exactly one, it
3791 // is the fallthrough MBB. If we find zero, then TBB is both the target MBB
3792 // and fallthrough MBB. If we find more than one, we cannot identify the
3793 // fallthrough MBB and should return nullptr.
3794 MachineBasicBlock *FallthroughBB = nullptr;
3795 for (MachineBasicBlock *Succ : MBB->successors()) {
3796 if (Succ->isEHPad() || (Succ == TBB && FallthroughBB))
3797 continue;
3798 // Return a nullptr if we found more than one fallthrough successor.
3799 if (FallthroughBB && FallthroughBB != TBB)
3800 return nullptr;
3801 FallthroughBB = Succ;
3802 }
3803 return FallthroughBB;
3804}
3805
3806bool X86InstrInfo::analyzeBranchImpl(
3809 SmallVectorImpl<MachineInstr *> &CondBranches, bool AllowModify) const {
3810
3811 // Start from the bottom of the block and work up, examining the
3812 // terminator instructions.
3814 MachineBasicBlock::iterator UnCondBrIter = MBB.end();
3815 while (I != MBB.begin()) {
3816 --I;
3817 if (I->isDebugInstr())
3818 continue;
3819
3820 // Working from the bottom, when we see a non-terminator instruction, we're
3821 // done.
3822 if (!isUnpredicatedTerminator(*I))
3823 break;
3824
3825 // A terminator that isn't a branch can't easily be handled by this
3826 // analysis.
3827 if (!I->isBranch())
3828 return true;
3829
3830 // Handle unconditional branches.
3831 if (I->getOpcode() == X86::JMP_1) {
3832 UnCondBrIter = I;
3833
3834 if (!AllowModify) {
3835 TBB = I->getOperand(0).getMBB();
3836 continue;
3837 }
3838
3839 // If the block has any instructions after a JMP, delete them.
3840 MBB.erase(std::next(I), MBB.end());
3841
3842 Cond.clear();
3843 FBB = nullptr;
3844
3845 // Delete the JMP if it's equivalent to a fall-through.
3846 if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
3847 TBB = nullptr;
3848 I->eraseFromParent();
3849 I = MBB.end();
3850 UnCondBrIter = MBB.end();
3851 continue;
3852 }
3853
3854 // TBB is used to indicate the unconditional destination.
3855 TBB = I->getOperand(0).getMBB();
3856 continue;
3857 }
3858
3859 // Handle conditional branches.
3860 X86::CondCode BranchCode = X86::getCondFromBranch(*I);
3861 if (BranchCode == X86::COND_INVALID)
3862 return true; // Can't handle indirect branch.
3863
3864 // In practice we should never have an undef eflags operand, if we do
3865 // abort here as we are not prepared to preserve the flag.
3866 if (I->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->isUndef())
3867 return true;
3868
3869 // Working from the bottom, handle the first conditional branch.
3870 if (Cond.empty()) {
3871 FBB = TBB;
3872 TBB = I->getOperand(0).getMBB();
3873 Cond.push_back(MachineOperand::CreateImm(BranchCode));
3874 CondBranches.push_back(&*I);
3875 continue;
3876 }
3877
3878 // Handle subsequent conditional branches. Only handle the case where all
3879 // conditional branches branch to the same destination and their condition
3880 // opcodes fit one of the special multi-branch idioms.
3881 assert(Cond.size() == 1);
3882 assert(TBB);
3883
3884 // If the conditions are the same, we can leave them alone.
3885 X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
3886 auto NewTBB = I->getOperand(0).getMBB();
3887 if (OldBranchCode == BranchCode && TBB == NewTBB)
3888 continue;
3889
3890 // If they differ, see if they fit one of the known patterns. Theoretically,
3891 // we could handle more patterns here, but we shouldn't expect to see them
3892 // if instruction selection has done a reasonable job.
3893 if (TBB == NewTBB &&
3894 ((OldBranchCode == X86::COND_P && BranchCode == X86::COND_NE) ||
3895 (OldBranchCode == X86::COND_NE && BranchCode == X86::COND_P))) {
3896 BranchCode = X86::COND_NE_OR_P;
3897 } else if ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_NE) ||
3898 (OldBranchCode == X86::COND_E && BranchCode == X86::COND_P)) {
3899 if (NewTBB != (FBB ? FBB : getFallThroughMBB(&MBB, TBB)))
3900 return true;
3901
3902 // X86::COND_E_AND_NP usually has two different branch destinations.
3903 //
3904 // JP B1
3905 // JE B2
3906 // JMP B1
3907 // B1:
3908 // B2:
3909 //
3910 // Here this condition branches to B2 only if NP && E. It has another
3911 // equivalent form:
3912 //
3913 // JNE B1
3914 // JNP B2
3915 // JMP B1
3916 // B1:
3917 // B2:
3918 //
3919 // Similarly it branches to B2 only if E && NP. That is why this condition
3920 // is named with COND_E_AND_NP.
3921 BranchCode = X86::COND_E_AND_NP;
3922 } else
3923 return true;
3924
3925 // Update the MachineOperand.
3926 Cond[0].setImm(BranchCode);
3927 CondBranches.push_back(&*I);
3928 }
3929
3930 return false;
3931}
3932
3935 MachineBasicBlock *&FBB,
3937 bool AllowModify) const {
3938 SmallVector<MachineInstr *, 4> CondBranches;
3939 return analyzeBranchImpl(MBB, TBB, FBB, Cond, CondBranches, AllowModify);
3940}
3941
3943 const MCInstrDesc &Desc = MI.getDesc();
3944 int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
3945 assert(MemRefBegin >= 0 && "instr should have memory operand");
3946 MemRefBegin += X86II::getOperandBias(Desc);
3947
3948 const MachineOperand &MO = MI.getOperand(MemRefBegin + X86::AddrDisp);
3949 if (!MO.isJTI())
3950 return -1;
3951
3952 return MO.getIndex();
3953}
3954
3956 Register Reg) {
3957 if (!Reg.isVirtual())
3958 return -1;
3959 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
3960 if (MI == nullptr)
3961 return -1;
3962 unsigned Opcode = MI->getOpcode();
3963 if (Opcode != X86::LEA64r && Opcode != X86::LEA32r)
3964 return -1;
3966}
3967
3969 unsigned Opcode = MI.getOpcode();
3970 // Switch-jump pattern for non-PIC code looks like:
3971 // JMP64m $noreg, 8, %X, %jump-table.X, $noreg
3972 if (Opcode == X86::JMP64m || Opcode == X86::JMP32m) {
3974 }
3975 // The pattern for PIC code looks like:
3976 // %0 = LEA64r $rip, 1, $noreg, %jump-table.X
3977 // %1 = MOVSX64rm32 %0, 4, XX, 0, $noreg
3978 // %2 = ADD64rr %1, %0
3979 // JMP64r %2
3980 if (Opcode == X86::JMP64r || Opcode == X86::JMP32r) {
3981 Register Reg = MI.getOperand(0).getReg();
3982 if (!Reg.isVirtual())
3983 return -1;
3984 const MachineFunction &MF = *MI.getParent()->getParent();
3985 const MachineRegisterInfo &MRI = MF.getRegInfo();
3986 MachineInstr *Add = MRI.getUniqueVRegDef(Reg);
3987 if (Add == nullptr)
3988 return -1;
3989 if (Add->getOpcode() != X86::ADD64rr && Add->getOpcode() != X86::ADD32rr)
3990 return -1;
3991 int JTI1 = getJumpTableIndexFromReg(MRI, Add->getOperand(1).getReg());
3992 if (JTI1 >= 0)
3993 return JTI1;
3994 int JTI2 = getJumpTableIndexFromReg(MRI, Add->getOperand(2).getReg());
3995 if (JTI2 >= 0)
3996 return JTI2;
3997 }
3998 return -1;
3999}
4000
4002 MachineBranchPredicate &MBP,
4003 bool AllowModify) const {
4004 using namespace std::placeholders;
4005
4007 SmallVector<MachineInstr *, 4> CondBranches;
4008 if (analyzeBranchImpl(MBB, MBP.TrueDest, MBP.FalseDest, Cond, CondBranches,
4009 AllowModify))
4010 return true;
4011
4012 if (Cond.size() != 1)
4013 return true;
4014
4015 assert(MBP.TrueDest && "expected!");
4016
4017 if (!MBP.FalseDest)
4018 MBP.FalseDest = MBB.getNextNode();
4019
4021
4022 MachineInstr *ConditionDef = nullptr;
4023 bool SingleUseCondition = true;
4024
4026 if (MI.modifiesRegister(X86::EFLAGS, TRI)) {
4027 ConditionDef = &MI;
4028 break;
4029 }
4030
4031 if (MI.readsRegister(X86::EFLAGS, TRI))
4032 SingleUseCondition = false;
4033 }
4034
4035 if (!ConditionDef)
4036 return true;
4037
4038 if (SingleUseCondition) {
4039 for (auto *Succ : MBB.successors())
4040 if (Succ->isLiveIn(X86::EFLAGS))
4041 SingleUseCondition = false;
4042 }
4043
4044 MBP.ConditionDef = ConditionDef;
4045 MBP.SingleUseCondition = SingleUseCondition;
4046
4047 // Currently we only recognize the simple pattern:
4048 //
4049 // test %reg, %reg
4050 // je %label
4051 //
4052 const unsigned TestOpcode =
4053 Subtarget.is64Bit() ? X86::TEST64rr : X86::TEST32rr;
4054
4055 if (ConditionDef->getOpcode() == TestOpcode &&
4056 ConditionDef->getNumOperands() == 3 &&
4057 ConditionDef->getOperand(0).isIdenticalTo(ConditionDef->getOperand(1)) &&
4058 (Cond[0].getImm() == X86::COND_NE || Cond[0].getImm() == X86::COND_E)) {
4059 MBP.LHS = ConditionDef->getOperand(0);
4060 MBP.RHS = MachineOperand::CreateImm(0);
4061 MBP.Predicate = Cond[0].getImm() == X86::COND_NE
4062 ? MachineBranchPredicate::PRED_NE
4063 : MachineBranchPredicate::PRED_EQ;
4064 return false;
4065 }
4066
4067 return true;
4068}
4069
4071 int *BytesRemoved) const {
4072 assert(!BytesRemoved && "code size not handled");
4073
4075 unsigned Count = 0;
4076
4077 while (I != MBB.begin()) {
4078 --I;
4079 if (I->isDebugInstr())
4080 continue;
4081 if (I->getOpcode() != X86::JMP_1 &&
4083 break;
4084 // Remove the branch.
4085 I->eraseFromParent();
4086 I = MBB.end();
4087 ++Count;
4088 }
4089
4090 return Count;
4091}
4092
4095 MachineBasicBlock *FBB,
4097 const DebugLoc &DL, int *BytesAdded) const {
4098 // Shouldn't be a fall through.
4099 assert(TBB && "insertBranch must not be told to insert a fallthrough");
4100 assert((Cond.size() == 1 || Cond.size() == 0) &&
4101 "X86 branch conditions have one component!");
4102 assert(!BytesAdded && "code size not handled");
4103
4104 if (Cond.empty()) {
4105 // Unconditional branch?
4106 assert(!FBB && "Unconditional branch with multiple successors!");
4107 BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(TBB);
4108 return 1;
4109 }
4110
4111 // If FBB is null, it is implied to be a fall-through block.
4112 bool FallThru = FBB == nullptr;
4113
4114 // Conditional branch.
4115 unsigned Count = 0;
4116 X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
4117 switch (CC) {
4118 case X86::COND_NE_OR_P:
4119 // Synthesize NE_OR_P with two branches.
4120 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_NE);
4121 ++Count;
4122 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_P);
4123 ++Count;
4124 break;
4125 case X86::COND_E_AND_NP:
4126 // Use the next block of MBB as FBB if it is null.
4127 if (FBB == nullptr) {
4128 FBB = getFallThroughMBB(&MBB, TBB);
4129 assert(FBB && "MBB cannot be the last block in function when the false "
4130 "body is a fall-through.");
4131 }
4132 // Synthesize COND_E_AND_NP with two branches.
4133 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(FBB).addImm(X86::COND_NE);
4134 ++Count;
4135 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(X86::COND_NP);
4136 ++Count;
4137 break;
4138 default: {
4139 BuildMI(&MBB, DL, get(X86::JCC_1)).addMBB(TBB).addImm(CC);
4140 ++Count;
4141 }
4142 }
4143 if (!FallThru) {
4144 // Two-way Conditional branch. Insert the second branch.
4145 BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(FBB);
4146 ++Count;
4147 }
4148 return Count;
4149}
4150
4153 Register DstReg, Register TrueReg,
4154 Register FalseReg, int &CondCycles,
4155 int &TrueCycles, int &FalseCycles) const {
4156 // Not all subtargets have cmov instructions.
4157 if (!Subtarget.canUseCMOV())
4158 return false;
4159 if (Cond.size() != 1)
4160 return false;
4161 // We cannot do the composite conditions, at least not in SSA form.
4162 if ((X86::CondCode)Cond[0].getImm() > X86::LAST_VALID_COND)
4163 return false;
4164
4165 // Check register classes.
4167 const TargetRegisterClass *RC =
4168 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
4169 if (!RC)
4170 return false;
4171
4172 // We have cmov instructions for 16, 32, and 64 bit general purpose registers.
4173 if (X86::GR16RegClass.hasSubClassEq(RC) ||
4174 X86::GR32RegClass.hasSubClassEq(RC) ||
4175 X86::GR64RegClass.hasSubClassEq(RC)) {
4176 // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy
4177 // Bridge. Probably Ivy Bridge as well.
4178 CondCycles = 2;
4179 TrueCycles = 2;
4180 FalseCycles = 2;
4181 return true;
4182 }
4183
4184 // Can't do vectors.
4185 return false;
4186}
4187
4190 const DebugLoc &DL, Register DstReg,
4192 Register FalseReg) const {
4194 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
4195 const TargetRegisterClass &RC = *MRI.getRegClass(DstReg);
4196 assert(Cond.size() == 1 && "Invalid Cond array");
4197 unsigned Opc =
4198 X86::getCMovOpcode(TRI.getRegSizeInBits(RC) / 8,
4199 false /*HasMemoryOperand*/, Subtarget.hasNDD());
4200 BuildMI(MBB, I, DL, get(Opc), DstReg)
4201 .addReg(FalseReg)
4202 .addReg(TrueReg)
4203 .addImm(Cond[0].getImm());
4204}
4205
4206/// Test if the given register is a physical h register.
4207static bool isHReg(unsigned Reg) {
4208 return X86::GR8_ABCD_HRegClass.contains(Reg);
4209}
4210
4211// Try and copy between VR128/VR64 and GR64 registers.
4212static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
4213 const X86Subtarget &Subtarget) {
4214 bool HasAVX = Subtarget.hasAVX();
4215 bool HasAVX512 = Subtarget.hasAVX512();
4216 bool HasEGPR = Subtarget.hasEGPR();
4217
4218 // SrcReg(MaskReg) -> DestReg(GR64)
4219 // SrcReg(MaskReg) -> DestReg(GR32)
4220
4221 // All KMASK RegClasses hold the same k registers, can be tested against
4222 // anyone.
4223 if (X86::VK16RegClass.contains(SrcReg)) {
4224 if (X86::GR64RegClass.contains(DestReg)) {
4225 assert(Subtarget.hasBWI());
4226 return HasEGPR ? X86::KMOVQrk_EVEX : X86::KMOVQrk;
4227 }
4228 if (X86::GR32RegClass.contains(DestReg))
4229 return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDrk_EVEX : X86::KMOVDrk)
4230 : (HasEGPR ? X86::KMOVWrk_EVEX : X86::KMOVWrk);
4231 }
4232
4233 // SrcReg(GR64) -> DestReg(MaskReg)
4234 // SrcReg(GR32) -> DestReg(MaskReg)
4235
4236 // All KMASK RegClasses hold the same k registers, can be tested against
4237 // anyone.
4238 if (X86::VK16RegClass.contains(DestReg)) {
4239 if (X86::GR64RegClass.contains(SrcReg)) {
4240 assert(Subtarget.hasBWI());
4241 return HasEGPR ? X86::KMOVQkr_EVEX : X86::KMOVQkr;
4242 }
4243 if (X86::GR32RegClass.contains(SrcReg))
4244 return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDkr_EVEX : X86::KMOVDkr)
4245 : (HasEGPR ? X86::KMOVWkr_EVEX : X86::KMOVWkr);
4246 }
4247
4248 // SrcReg(VR128) -> DestReg(GR64)
4249 // SrcReg(VR64) -> DestReg(GR64)
4250 // SrcReg(GR64) -> DestReg(VR128)
4251 // SrcReg(GR64) -> DestReg(VR64)
4252
4253 if (X86::GR64RegClass.contains(DestReg)) {
4254 if (X86::VR128XRegClass.contains(SrcReg))
4255 // Copy from a VR128 register to a GR64 register.
4256 return HasAVX512 ? X86::VMOVPQIto64Zrr
4257 : HasAVX ? X86::VMOVPQIto64rr
4258 : X86::MOVPQIto64rr;
4259 if (X86::VR64RegClass.contains(SrcReg))
4260 // Copy from a VR64 register to a GR64 register.
4261 return X86::MMX_MOVD64from64rr;
4262 } else if (X86::GR64RegClass.contains(SrcReg)) {
4263 // Copy from a GR64 register to a VR128 register.
4264 if (X86::VR128XRegClass.contains(DestReg))
4265 return HasAVX512 ? X86::VMOV64toPQIZrr
4266 : HasAVX ? X86::VMOV64toPQIrr
4267 : X86::MOV64toPQIrr;
4268 // Copy from a GR64 register to a VR64 register.
4269 if (X86::VR64RegClass.contains(DestReg))
4270 return X86::MMX_MOVD64to64rr;
4271 }
4272
4273 // SrcReg(VR128) -> DestReg(GR32)
4274 // SrcReg(GR32) -> DestReg(VR128)
4275
4276 if (X86::GR32RegClass.contains(DestReg) &&
4277 X86::VR128XRegClass.contains(SrcReg))
4278 // Copy from a VR128 register to a GR32 register.
4279 return HasAVX512 ? X86::VMOVPDI2DIZrr
4280 : HasAVX ? X86::VMOVPDI2DIrr
4281 : X86::MOVPDI2DIrr;
4282
4283 if (X86::VR128XRegClass.contains(DestReg) &&
4284 X86::GR32RegClass.contains(SrcReg))
4285 // Copy from a VR128 register to a VR128 register.
4286 return HasAVX512 ? X86::VMOVDI2PDIZrr
4287 : HasAVX ? X86::VMOVDI2PDIrr
4288 : X86::MOVDI2PDIrr;
4289 return 0;
4290}
4291
4294 const DebugLoc &DL, MCRegister DestReg,
4295 MCRegister SrcReg, bool KillSrc,
4296 bool RenamableDest, bool RenamableSrc) const {
4297 // First deal with the normal symmetric copies.
4298 bool HasAVX = Subtarget.hasAVX();
4299 bool HasVLX = Subtarget.hasVLX();
4300 bool HasEGPR = Subtarget.hasEGPR();
4301 unsigned Opc = 0;
4302 if (X86::GR64RegClass.contains(DestReg, SrcReg))
4303 Opc = X86::MOV64rr;
4304 else if (X86::GR32RegClass.contains(DestReg, SrcReg))
4305 Opc = X86::MOV32rr;
4306 else if (X86::GR16RegClass.contains(DestReg, SrcReg))
4307 Opc = X86::MOV16rr;
4308 else if (X86::GR8RegClass.contains(DestReg, SrcReg)) {
4309 // Copying to or from a physical H register on x86-64 requires a NOREX
4310 // move. Otherwise use a normal move.
4311 if ((isHReg(DestReg) || isHReg(SrcReg)) && Subtarget.is64Bit()) {
4312 Opc = X86::MOV8rr_NOREX;
4313 // Both operands must be encodable without an REX prefix.
4314 assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) &&
4315 "8-bit H register can not be copied outside GR8_NOREX");
4316 } else
4317 Opc = X86::MOV8rr;
4318 } else if (X86::VR64RegClass.contains(DestReg, SrcReg))
4319 Opc = X86::MMX_MOVQ64rr;
4320 else if (X86::VR128XRegClass.contains(DestReg, SrcReg)) {
4321 if (HasVLX)
4322 Opc = X86::VMOVAPSZ128rr;
4323 else if (X86::VR128RegClass.contains(DestReg, SrcReg))
4324 Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
4325 else {
4326 // If this an extended register and we don't have VLX we need to use a
4327 // 512-bit move.
4328 Opc = X86::VMOVAPSZrr;
4330 DestReg =
4331 TRI->getMatchingSuperReg(DestReg, X86::sub_xmm, &X86::VR512RegClass);
4332 SrcReg =
4333 TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass);
4334 }
4335 } else if (X86::VR256XRegClass.contains(DestReg, SrcReg)) {
4336 if (HasVLX)
4337 Opc = X86::VMOVAPSZ256rr;
4338 else if (X86::VR256RegClass.contains(DestReg, SrcReg))
4339 Opc = X86::VMOVAPSYrr;
4340 else {
4341 // If this an extended register and we don't have VLX we need to use a
4342 // 512-bit move.
4343 Opc = X86::VMOVAPSZrr;
4345 DestReg =
4346 TRI->getMatchingSuperReg(DestReg, X86::sub_ymm, &X86::VR512RegClass);
4347 SrcReg =
4348 TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
4349 }
4350 } else if (X86::VR512RegClass.contains(DestReg, SrcReg))
4351 Opc = X86::VMOVAPSZrr;
4352 // All KMASK RegClasses hold the same k registers, can be tested against
4353 // anyone.
4354 else if (X86::VK16RegClass.contains(DestReg, SrcReg))
4355 Opc = Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVQkk)
4356 : (HasEGPR ? X86::KMOVQkk_EVEX : X86::KMOVWkk);
4357 if (!Opc)
4358 Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
4359
4360 if (Opc) {
4361 BuildMI(MBB, MI, DL, get(Opc), DestReg)
4362 .addReg(SrcReg, getKillRegState(KillSrc));
4363 return;
4364 }
4365
4366 if (SrcReg == X86::EFLAGS || DestReg == X86::EFLAGS) {
4367 // FIXME: We use a fatal error here because historically LLVM has tried
4368 // lower some of these physreg copies and we want to ensure we get
4369 // reasonable bug reports if someone encounters a case no other testing
4370 // found. This path should be removed after the LLVM 7 release.
4371 report_fatal_error("Unable to copy EFLAGS physical register!");
4372 }
4373
4374 LLVM_DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) << " to "
4375 << RI.getName(DestReg) << '\n');
4376 report_fatal_error("Cannot emit physreg copy instruction");
4377}
4378
4379std::optional<DestSourcePair>
4381 if (MI.isMoveReg()) {
4382 // FIXME: Dirty hack for apparent invariant that doesn't hold when
4383 // subreg_to_reg is coalesced with ordinary copies, such that the bits that
4384 // were asserted as 0 are now undef.
4385 if (MI.getOperand(0).isUndef() && MI.getOperand(0).getSubReg())
4386 return std::nullopt;
4387
4388 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
4389 }
4390 return std::nullopt;
4391}
4392
4393static unsigned getLoadStoreOpcodeForFP16(bool Load, const X86Subtarget &STI) {
4394 if (STI.hasFP16())
4395 return Load ? X86::VMOVSHZrm_alt : X86::VMOVSHZmr;
4396 if (Load)
4397 return STI.hasAVX512() ? X86::VMOVSSZrm
4398 : STI.hasAVX() ? X86::VMOVSSrm
4399 : X86::MOVSSrm;
4400 else
4401 return STI.hasAVX512() ? X86::VMOVSSZmr
4402 : STI.hasAVX() ? X86::VMOVSSmr
4403 : X86::MOVSSmr;
4404}
4405
4407 const TargetRegisterClass *RC,
4408 bool IsStackAligned,
4409 const X86Subtarget &STI, bool Load) {
4410 bool HasAVX = STI.hasAVX();
4411 bool HasAVX512 = STI.hasAVX512();
4412 bool HasVLX = STI.hasVLX();
4413 bool HasEGPR = STI.hasEGPR();
4414
4415 assert(RC != nullptr && "Invalid target register class");
4416 switch (STI.getRegisterInfo()->getSpillSize(*RC)) {
4417 default:
4418 llvm_unreachable("Unknown spill size");
4419 case 1:
4420 assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass");
4421 if (STI.is64Bit())
4422 // Copying to or from a physical H register on x86-64 requires a NOREX
4423 // move. Otherwise use a normal move.
4424 if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
4425 return Load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
4426 return Load ? X86::MOV8rm : X86::MOV8mr;
4427 case 2:
4428 if (X86::VK16RegClass.hasSubClassEq(RC))
4429 return Load ? (HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm)
4430 : (HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
4431 assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
4432 return Load ? X86::MOV16rm : X86::MOV16mr;
4433 case 4:
4434 if (X86::GR32RegClass.hasSubClassEq(RC))
4435 return Load ? X86::MOV32rm : X86::MOV32mr;
4436 if (X86::FR32XRegClass.hasSubClassEq(RC))
4437 return Load ? (HasAVX512 ? X86::VMOVSSZrm_alt
4438 : HasAVX ? X86::VMOVSSrm_alt
4439 : X86::MOVSSrm_alt)
4440 : (HasAVX512 ? X86::VMOVSSZmr
4441 : HasAVX ? X86::VMOVSSmr
4442 : X86::MOVSSmr);
4443 if (X86::RFP32RegClass.hasSubClassEq(RC))
4444 return Load ? X86::LD_Fp32m : X86::ST_Fp32m;
4445 if (X86::VK32RegClass.hasSubClassEq(RC)) {
4446 assert(STI.hasBWI() && "KMOVD requires BWI");
4447 return Load ? (HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm)
4448 : (HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
4449 }
4450 // All of these mask pair classes have the same spill size, the same kind
4451 // of kmov instructions can be used with all of them.
4452 if (X86::VK1PAIRRegClass.hasSubClassEq(RC) ||
4453 X86::VK2PAIRRegClass.hasSubClassEq(RC) ||
4454 X86::VK4PAIRRegClass.hasSubClassEq(RC) ||
4455 X86::VK8PAIRRegClass.hasSubClassEq(RC) ||
4456 X86::VK16PAIRRegClass.hasSubClassEq(RC))
4457 return Load ? X86::MASKPAIR16LOAD : X86::MASKPAIR16STORE;
4458 if (X86::FR16RegClass.hasSubClassEq(RC) ||
4459 X86::FR16XRegClass.hasSubClassEq(RC))
4460 return getLoadStoreOpcodeForFP16(Load, STI);
4461 llvm_unreachable("Unknown 4-byte regclass");
4462 case 8:
4463 if (X86::GR64RegClass.hasSubClassEq(RC))
4464 return Load ? X86::MOV64rm : X86::MOV64mr;
4465 if (X86::FR64XRegClass.hasSubClassEq(RC))
4466 return Load ? (HasAVX512 ? X86::VMOVSDZrm_alt
4467 : HasAVX ? X86::VMOVSDrm_alt
4468 : X86::MOVSDrm_alt)
4469 : (HasAVX512 ? X86::VMOVSDZmr
4470 : HasAVX ? X86::VMOVSDmr
4471 : X86::MOVSDmr);
4472 if (X86::VR64RegClass.hasSubClassEq(RC))
4473 return Load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
4474 if (X86::RFP64RegClass.hasSubClassEq(RC))
4475 return Load ? X86::LD_Fp64m : X86::ST_Fp64m;
4476 if (X86::VK64RegClass.hasSubClassEq(RC)) {
4477 assert(STI.hasBWI() && "KMOVQ requires BWI");
4478 return Load ? (HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm)
4479 : (HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
4480 }
4481 llvm_unreachable("Unknown 8-byte regclass");
4482 case 10:
4483 assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
4484 return Load ? X86::LD_Fp80m : X86::ST_FpP80m;
4485 case 16: {
4486 if (X86::VR128XRegClass.hasSubClassEq(RC)) {
4487 // If stack is realigned we can use aligned stores.
4488 if (IsStackAligned)
4489 return Load ? (HasVLX ? X86::VMOVAPSZ128rm
4490 : HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX
4491 : HasAVX ? X86::VMOVAPSrm
4492 : X86::MOVAPSrm)
4493 : (HasVLX ? X86::VMOVAPSZ128mr
4494 : HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX
4495 : HasAVX ? X86::VMOVAPSmr
4496 : X86::MOVAPSmr);
4497 else
4498 return Load ? (HasVLX ? X86::VMOVUPSZ128rm
4499 : HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX
4500 : HasAVX ? X86::VMOVUPSrm
4501 : X86::MOVUPSrm)
4502 : (HasVLX ? X86::VMOVUPSZ128mr
4503 : HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX
4504 : HasAVX ? X86::VMOVUPSmr
4505 : X86::MOVUPSmr);
4506 }
4507 llvm_unreachable("Unknown 16-byte regclass");
4508 }
4509 case 32:
4510 assert(X86::VR256XRegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
4511 // If stack is realigned we can use aligned stores.
4512 if (IsStackAligned)
4513 return Load ? (HasVLX ? X86::VMOVAPSZ256rm
4514 : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX
4515 : X86::VMOVAPSYrm)
4516 : (HasVLX ? X86::VMOVAPSZ256mr
4517 : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX
4518 : X86::VMOVAPSYmr);
4519 else
4520 return Load ? (HasVLX ? X86::VMOVUPSZ256rm
4521 : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX
4522 : X86::VMOVUPSYrm)
4523 : (HasVLX ? X86::VMOVUPSZ256mr
4524 : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX
4525 : X86::VMOVUPSYmr);
4526 case 64:
4527 assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass");
4528 assert(STI.hasAVX512() && "Using 512-bit register requires AVX512");
4529 if (IsStackAligned)
4530 return Load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
4531 else
4532 return Load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
4533 case 1024:
4534 assert(X86::TILERegClass.hasSubClassEq(RC) && "Unknown 1024-byte regclass");
4535 assert(STI.hasAMXTILE() && "Using 8*1024-bit register requires AMX-TILE");
4536#define GET_EGPR_IF_ENABLED(OPC) (STI.hasEGPR() ? OPC##_EVEX : OPC)
4537 return Load ? GET_EGPR_IF_ENABLED(X86::TILELOADD)
4538 : GET_EGPR_IF_ENABLED(X86::TILESTORED);
4539#undef GET_EGPR_IF_ENABLED
4540 case 2048:
4541 assert(X86::TILEPAIRRegClass.hasSubClassEq(RC) &&
4542 "Unknown 2048-byte regclass");
4543 assert(STI.hasAMXTILE() && "Using 2048-bit register requires AMX-TILE");
4544 return Load ? X86::PTILEPAIRLOAD : X86::PTILEPAIRSTORE;
4545 }
4546}
4547
4548std::optional<ExtAddrMode>
4550 const TargetRegisterInfo *TRI) const {
4551 const MCInstrDesc &Desc = MemI.getDesc();
4552 int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
4553 if (MemRefBegin < 0)
4554 return std::nullopt;
4555
4556 MemRefBegin += X86II::getOperandBias(Desc);
4557
4558 auto &BaseOp = MemI.getOperand(MemRefBegin + X86::AddrBaseReg);
4559 if (!BaseOp.isReg()) // Can be an MO_FrameIndex
4560 return std::nullopt;
4561
4562 const MachineOperand &DispMO = MemI.getOperand(MemRefBegin + X86::AddrDisp);
4563 // Displacement can be symbolic
4564 if (!DispMO.isImm())
4565 return std::nullopt;
4566
4567 ExtAddrMode AM;
4568 AM.BaseReg = BaseOp.getReg();
4569 AM.ScaledReg = MemI.getOperand(MemRefBegin + X86::AddrIndexReg).getReg();
4570 AM.Scale = MemI.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm();
4571 AM.Displacement = DispMO.getImm();
4572 return AM;
4573}
4574
4576 StringRef &ErrInfo) const {
4577 std::optional<ExtAddrMode> AMOrNone = getAddrModeFromMemoryOp(MI, nullptr);
4578 if (!AMOrNone)
4579 return true;
4580
4581 ExtAddrMode AM = *AMOrNone;
4583 if (AM.ScaledReg != X86::NoRegister) {
4584 switch (AM.Scale) {
4585 case 1:
4586 case 2:
4587 case 4:
4588 case 8:
4589 break;
4590 default:
4591 ErrInfo = "Scale factor in address must be 1, 2, 4 or 8";
4592 return false;
4593 }
4594 }
4595 if (!isInt<32>(AM.Displacement)) {
4596 ErrInfo = "Displacement in address must fit into 32-bit signed "
4597 "integer";
4598 return false;
4599 }
4600
4601 return true;
4602}
4603
4605 const Register Reg,
4606 int64_t &ImmVal) const {
4607 Register MovReg = Reg;
4608 const MachineInstr *MovMI = &MI;
4609
4610 // Follow use-def for SUBREG_TO_REG to find the real move immediate
4611 // instruction. It is quite common for x86-64.
4612 if (MI.isSubregToReg()) {
4613 // We use following pattern to setup 64b immediate.
4614 // %8:gr32 = MOV32r0 implicit-def dead $eflags
4615 // %6:gr64 = SUBREG_TO_REG 0, killed %8:gr32, %subreg.sub_32bit
4616 if (!MI.getOperand(1).isImm())
4617 return false;
4618 unsigned FillBits = MI.getOperand(1).getImm();
4619 unsigned SubIdx = MI.getOperand(3).getImm();
4620 MovReg = MI.getOperand(2).getReg();
4621 if (SubIdx != X86::sub_32bit || FillBits != 0)
4622 return false;
4623 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4624 MovMI = MRI.getUniqueVRegDef(MovReg);
4625 if (!MovMI)
4626 return false;
4627 }
4628
4629 if (MovMI->getOpcode() == X86::MOV32r0 &&
4630 MovMI->getOperand(0).getReg() == MovReg) {
4631 ImmVal = 0;
4632 return true;
4633 }
4634
4635 if (MovMI->getOpcode() != X86::MOV32ri &&
4636 MovMI->getOpcode() != X86::MOV64ri &&
4637 MovMI->getOpcode() != X86::MOV32ri64 && MovMI->getOpcode() != X86::MOV8ri)
4638 return false;
4639 // Mov Src can be a global address.
4640 if (!MovMI->getOperand(1).isImm() || MovMI->getOperand(0).getReg() != MovReg)
4641 return false;
4642 ImmVal = MovMI->getOperand(1).getImm();
4643 return true;
4644}
4645
4647 const MachineInstr *MI, const Register NullValueReg,
4648 const TargetRegisterInfo *TRI) const {
4649 if (!MI->modifiesRegister(NullValueReg, TRI))
4650 return true;
4651 switch (MI->getOpcode()) {
4652 // Shift right/left of a null unto itself is still a null, i.e. rax = shl rax
4653 // X.
4654 case X86::SHR64ri:
4655 case X86::SHR32ri:
4656 case X86::SHL64ri:
4657 case X86::SHL32ri:
4658 assert(MI->getOperand(0).isDef() && MI->getOperand(1).isUse() &&
4659 "expected for shift opcode!");
4660 return MI->getOperand(0).getReg() == NullValueReg &&
4661 MI->getOperand(1).getReg() == NullValueReg;
4662 // Zero extend of a sub-reg of NullValueReg into itself does not change the
4663 // null value.
4664 case X86::MOV32rr:
4665 return llvm::all_of(MI->operands(), [&](const MachineOperand &MO) {
4666 return TRI->isSubRegisterEq(NullValueReg, MO.getReg());
4667 });
4668 default:
4669 return false;
4670 }
4671 llvm_unreachable("Should be handled above!");
4672}
4673
4676 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
4677 const TargetRegisterInfo *TRI) const {
4678 const MCInstrDesc &Desc = MemOp.getDesc();
4679 int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
4680 if (MemRefBegin < 0)
4681 return false;
4682
4683 MemRefBegin += X86II::getOperandBias(Desc);
4684
4685 const MachineOperand *BaseOp =
4686 &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg);
4687 if (!BaseOp->isReg()) // Can be an MO_FrameIndex
4688 return false;
4689
4690 if (MemOp.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1)
4691 return false;
4692
4693 if (MemOp.getOperand(MemRefBegin + X86::AddrIndexReg).getReg() !=
4694 X86::NoRegister)
4695 return false;
4696
4697 const MachineOperand &DispMO = MemOp.getOperand(MemRefBegin + X86::AddrDisp);
4698
4699 // Displacement can be symbolic
4700 if (!DispMO.isImm())
4701 return false;
4702
4703 Offset = DispMO.getImm();
4704
4705 if (!BaseOp->isReg())
4706 return false;
4707
4708 OffsetIsScalable = false;
4709 // FIXME: Relying on memoperands() may not be right thing to do here. Check
4710 // with X86 maintainers, and fix it accordingly. For now, it is ok, since
4711 // there is no use of `Width` for X86 back-end at the moment.
4712 Width =
4713 !MemOp.memoperands_empty() ? MemOp.memoperands().front()->getSize() : 0;
4714 BaseOps.push_back(BaseOp);
4715 return true;
4716}
4717
4718static unsigned getStoreRegOpcode(Register SrcReg,
4719 const TargetRegisterClass *RC,
4720 bool IsStackAligned,
4721 const X86Subtarget &STI) {
4722 return getLoadStoreRegOpcode(SrcReg, RC, IsStackAligned, STI, false);
4723}
4724
4725static unsigned getLoadRegOpcode(Register DestReg,
4726 const TargetRegisterClass *RC,
4727 bool IsStackAligned, const X86Subtarget &STI) {
4728 return getLoadStoreRegOpcode(DestReg, RC, IsStackAligned, STI, true);
4729}
4730
4731static bool isAMXOpcode(unsigned Opc) {
4732 switch (Opc) {
4733 default:
4734 return false;
4735 case X86::TILELOADD:
4736 case X86::TILESTORED:
4737 case X86::TILELOADD_EVEX:
4738 case X86::TILESTORED_EVEX:
4739 case X86::PTILEPAIRLOAD:
4740 case X86::PTILEPAIRSTORE:
4741 return true;
4742 }
4743}
4744
4747 unsigned Opc, Register Reg, int FrameIdx,
4748 bool isKill) const {
4749 switch (Opc) {
4750 default:
4751 llvm_unreachable("Unexpected special opcode!");
4752 case X86::TILESTORED:
4753 case X86::TILESTORED_EVEX:
4754 case X86::PTILEPAIRSTORE: {
4755 // tilestored %tmm, (%sp, %idx)
4757 Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
4758 BuildMI(MBB, MI, DebugLoc(), get(X86::MOV64ri), VirtReg).addImm(64);
4759 MachineInstr *NewMI =
4760 addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
4761 .addReg(Reg, getKillRegState(isKill));
4763 MO.setReg(VirtReg);
4764 MO.setIsKill(true);
4765 break;
4766 }
4767 case X86::TILELOADD:
4768 case X86::TILELOADD_EVEX:
4769 case X86::PTILEPAIRLOAD: {
4770 // tileloadd (%sp, %idx), %tmm
4772 Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
4773 BuildMI(MBB, MI, DebugLoc(), get(X86::MOV64ri), VirtReg).addImm(64);
4775 BuildMI(MBB, MI, DebugLoc(), get(Opc), Reg), FrameIdx);
4777 MO.setReg(VirtReg);
4778 MO.setIsKill(true);
4779 break;
4780 }
4781 }
4782}
4783
4786 bool isKill, int FrameIdx, const TargetRegisterClass *RC,
4787 const TargetRegisterInfo *TRI, Register VReg) const {
4788 const MachineFunction &MF = *MBB.getParent();
4789 const MachineFrameInfo &MFI = MF.getFrameInfo();
4790 assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&
4791 "Stack slot too small for store");
4792
4793 unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
4794 bool isAligned =
4795 (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
4796 (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx));
4797
4798 unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
4799 if (isAMXOpcode(Opc))
4800 loadStoreTileReg(MBB, MI, Opc, SrcReg, FrameIdx, isKill);
4801 else
4802 addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
4803 .addReg(SrcReg, getKillRegState(isKill));
4804}
4805
4808 Register DestReg, int FrameIdx,
4809 const TargetRegisterClass *RC,
4810 const TargetRegisterInfo *TRI,
4811 Register VReg) const {
4812 const MachineFunction &MF = *MBB.getParent();
4813 const MachineFrameInfo &MFI = MF.getFrameInfo();
4814 assert(MFI.getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&
4815 "Load size exceeds stack slot");
4816 unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
4817 bool isAligned =
4818 (Subtarget.getFrameLowering()->getStackAlign() >= Alignment) ||
4819 (RI.canRealignStack(MF) && !MFI.isFixedObjectIndex(FrameIdx));
4820
4821 unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
4822 if (isAMXOpcode(Opc))
4823 loadStoreTileReg(MBB, MI, Opc, DestReg, FrameIdx);
4824 else
4825 addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg),
4826 FrameIdx);
4827}
4828
4830 Register &SrcReg2, int64_t &CmpMask,
4831 int64_t &CmpValue) const {
4832 switch (MI.getOpcode()) {
4833 default:
4834 break;
4835 case X86::CMP64ri32:
4836 case X86::CMP32ri:
4837 case X86::CMP16ri:
4838 case X86::CMP8ri:
4839 SrcReg = MI.getOperand(0).getReg();
4840 SrcReg2 = 0;
4841 if (MI.getOperand(1).isImm()) {
4842 CmpMask = ~0;
4843 CmpValue = MI.getOperand(1).getImm();
4844 } else {
4845 CmpMask = CmpValue = 0;
4846 }
4847 return true;
4848 // A SUB can be used to perform comparison.
4849 CASE_ND(SUB64rm)
4850 CASE_ND(SUB32rm)
4851 CASE_ND(SUB16rm)
4852 CASE_ND(SUB8rm)
4853 SrcReg = MI.getOperand(1).getReg();
4854 SrcReg2 = 0;
4855 CmpMask = 0;
4856 CmpValue = 0;
4857 return true;
4858 CASE_ND(SUB64rr)
4859 CASE_ND(SUB32rr)
4860 CASE_ND(SUB16rr)
4861 CASE_ND(SUB8rr)
4862 SrcReg = MI.getOperand(1).getReg();
4863 SrcReg2 = MI.getOperand(2).getReg();
4864 CmpMask = 0;
4865 CmpValue = 0;
4866 return true;
4867 CASE_ND(SUB64ri32)
4868 CASE_ND(SUB32ri)
4869 CASE_ND(SUB16ri)
4870 CASE_ND(SUB8ri)
4871 SrcReg = MI.getOperand(1).getReg();
4872 SrcReg2 = 0;
4873 if (MI.getOperand(2).isImm()) {
4874 CmpMask = ~0;
4875 CmpValue = MI.getOperand(2).getImm();
4876 } else {
4877 CmpMask = CmpValue = 0;
4878 }
4879 return true;
4880 case X86::CMP64rr:
4881 case X86::CMP32rr:
4882 case X86::CMP16rr:
4883 case X86::CMP8rr:
4884 SrcReg = MI.getOperand(0).getReg();
4885 SrcReg2 = MI.getOperand(1).getReg();
4886 CmpMask = 0;
4887 CmpValue = 0;
4888 return true;
4889 case X86::TEST8rr:
4890 case X86::TEST16rr:
4891 case X86::TEST32rr:
4892 case X86::TEST64rr:
4893 SrcReg = MI.getOperand(0).getReg();
4894 if (MI.getOperand(1).getReg() != SrcReg)
4895 return false;
4896 // Compare against zero.
4897 SrcReg2 = 0;
4898 CmpMask = ~0;
4899 CmpValue = 0;
4900 return true;
4901 }
4902 return false;
4903}
4904
4905bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
4906 Register SrcReg, Register SrcReg2,
4907 int64_t ImmMask, int64_t ImmValue,
4908 const MachineInstr &OI, bool *IsSwapped,
4909 int64_t *ImmDelta) const {
4910 switch (OI.getOpcode()) {
4911 case X86::CMP64rr:
4912 case X86::CMP32rr:
4913 case X86::CMP16rr:
4914 case X86::CMP8rr:
4915 CASE_ND(SUB64rr)
4916 CASE_ND(SUB32rr)
4917 CASE_ND(SUB16rr)
4918 CASE_ND(SUB8rr) {
4919 Register OISrcReg;
4920 Register OISrcReg2;
4921 int64_t OIMask;
4922 int64_t OIValue;
4923 if (!analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) ||
4924 OIMask != ImmMask || OIValue != ImmValue)
4925 return false;
4926 if (SrcReg == OISrcReg && SrcReg2 == OISrcReg2) {
4927 *IsSwapped = false;
4928 return true;
4929 }
4930 if (SrcReg == OISrcReg2 && SrcReg2 == OISrcReg) {
4931 *IsSwapped = true;
4932 return true;
4933 }
4934 return false;
4935 }
4936 case X86::CMP64ri32:
4937 case X86::CMP32ri:
4938 case X86::CMP16ri:
4939 case X86::CMP8ri:
4940 CASE_ND(SUB64ri32)
4941 CASE_ND(SUB32ri)
4942 CASE_ND(SUB16ri)
4943 CASE_ND(SUB8ri)
4944 case X86::TEST64rr:
4945 case X86::TEST32rr:
4946 case X86::TEST16rr:
4947 case X86::TEST8rr: {
4948 if (ImmMask != 0) {
4949 Register OISrcReg;
4950 Register OISrcReg2;
4951 int64_t OIMask;
4952 int64_t OIValue;
4953 if (analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) &&
4954 SrcReg == OISrcReg && ImmMask == OIMask) {
4955 if (OIValue == ImmValue) {
4956 *ImmDelta = 0;
4957 return true;
4958 } else if (static_cast<uint64_t>(ImmValue) ==
4959 static_cast<uint64_t>(OIValue) - 1) {
4960 *ImmDelta = -1;
4961 return true;
4962 } else if (static_cast<uint64_t>(ImmValue) ==
4963 static_cast<uint64_t>(OIValue) + 1) {
4964 *ImmDelta = 1;
4965 return true;
4966 } else {
4967 return false;
4968 }
4969 }
4970 }
4971 return FlagI.isIdenticalTo(OI);
4972 }
4973 default:
4974 return false;
4975 }
4976}
4977
4978/// Check whether the definition can be converted
4979/// to remove a comparison against zero.
4980inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
4981 bool &ClearsOverflowFlag) {
4982 NoSignFlag = false;
4983 ClearsOverflowFlag = false;
4984
4985 // "ELF Handling for Thread-Local Storage" specifies that x86-64 GOTTPOFF, and
4986 // i386 GOTNTPOFF/INDNTPOFF relocations can convert an ADD to a LEA during
4987 // Initial Exec to Local Exec relaxation. In these cases, we must not depend
4988 // on the EFLAGS modification of ADD actually happening in the final binary.
4989 if (MI.getOpcode() == X86::ADD64rm || MI.getOpcode() == X86::ADD32rm) {
4990 unsigned Flags = MI.getOperand(5).getTargetFlags();
4991 if (Flags == X86II::MO_GOTTPOFF || Flags == X86II::MO_INDNTPOFF ||
4992 Flags == X86II::MO_GOTNTPOFF)
4993 return false;
4994 }
4995
4996 switch (MI.getOpcode()) {
4997 default:
4998 return false;
4999
5000 // The shift instructions only modify ZF if their shift count is non-zero.
5001 // N.B.: The processor truncates the shift count depending on the encoding.
5002 CASE_ND(SAR8ri)
5003 CASE_ND(SAR16ri)
5004 CASE_ND(SAR32ri)
5005 CASE_ND(SAR64ri)
5006 CASE_ND(SHR8ri)
5007 CASE_ND(SHR16ri)
5008 CASE_ND(SHR32ri)
5009 CASE_ND(SHR64ri)
5010 return getTruncatedShiftCount(MI, 2) != 0;
5011
5012 // Some left shift instructions can be turned into LEA instructions but only
5013 // if their flags aren't used. Avoid transforming such instructions.
5014 CASE_ND(SHL8ri)
5015 CASE_ND(SHL16ri)
5016 CASE_ND(SHL32ri)
5017 CASE_ND(SHL64ri) {
5018 unsigned ShAmt = getTruncatedShiftCount(MI, 2);
5019 if (isTruncatedShiftCountForLEA(ShAmt))
5020 return false;
5021 return ShAmt != 0;
5022 }
5023
5024 CASE_ND(SHRD16rri8)
5025 CASE_ND(SHRD32rri8)
5026 CASE_ND(SHRD64rri8)
5027 CASE_ND(SHLD16rri8)
5028 CASE_ND(SHLD32rri8)
5029 CASE_ND(SHLD64rri8)
5030 return getTruncatedShiftCount(MI, 3) != 0;
5031
5032 CASE_ND(SUB64ri32)
5033 CASE_ND(SUB32ri)
5034 CASE_ND(SUB16ri)
5035 CASE_ND(SUB8ri)
5036 CASE_ND(SUB64rr)
5037 CASE_ND(SUB32rr)
5038 CASE_ND(SUB16rr)
5039 CASE_ND(SUB8rr)
5040 CASE_ND(SUB64rm)
5041 CASE_ND(SUB32rm)
5042 CASE_ND(SUB16rm)
5043 CASE_ND(SUB8rm)
5044 CASE_ND(DEC64r)
5045 CASE_ND(DEC32r)
5046 CASE_ND(DEC16r)
5047 CASE_ND(DEC8r)
5048 CASE_ND(ADD64ri32)
5049 CASE_ND(ADD32ri)
5050 CASE_ND(ADD16ri)
5051 CASE_ND(ADD8ri)
5052 CASE_ND(ADD64rr)
5053 CASE_ND(ADD32rr)
5054 CASE_ND(ADD16rr)
5055 CASE_ND(ADD8rr)
5056 CASE_ND(ADD64rm)
5057 CASE_ND(ADD32rm)
5058 CASE_ND(ADD16rm)
5059 CASE_ND(ADD8rm)
5060 CASE_ND(INC64r)
5061 CASE_ND(INC32r)
5062 CASE_ND(INC16r)
5063 CASE_ND(INC8r)
5064 CASE_ND(ADC64ri32)
5065 CASE_ND(ADC32ri)
5066 CASE_ND(ADC16ri)
5067 CASE_ND(ADC8ri)
5068 CASE_ND(ADC64rr)
5069 CASE_ND(ADC32rr)
5070 CASE_ND(ADC16rr)
5071 CASE_ND(ADC8rr)
5072 CASE_ND(ADC64rm)
5073 CASE_ND(ADC32rm)
5074 CASE_ND(ADC16rm)
5075 CASE_ND(ADC8rm)
5076 CASE_ND(SBB64ri32)
5077 CASE_ND(SBB32ri)
5078 CASE_ND(SBB16ri)
5079 CASE_ND(SBB8ri)
5080 CASE_ND(SBB64rr)
5081 CASE_ND(SBB32rr)
5082 CASE_ND(SBB16rr)
5083 CASE_ND(SBB8rr)
5084 CASE_ND(SBB64rm)
5085 CASE_ND(SBB32rm)
5086 CASE_ND(SBB16rm)
5087 CASE_ND(SBB8rm)
5088 CASE_ND(NEG8r)
5089 CASE_ND(NEG16r)
5090 CASE_ND(NEG32r)
5091 CASE_ND(NEG64r)
5092 case X86::LZCNT16rr:
5093 case X86::LZCNT16rm:
5094 case X86::LZCNT32rr:
5095 case X86::LZCNT32rm:
5096 case X86::LZCNT64rr:
5097 case X86::LZCNT64rm:
5098 case X86::POPCNT16rr:
5099 case X86::POPCNT16rm:
5100 case X86::POPCNT32rr:
5101 case X86::POPCNT32rm:
5102 case X86::POPCNT64rr:
5103 case X86::POPCNT64rm:
5104 case X86::TZCNT16rr:
5105 case X86::TZCNT16rm:
5106 case X86::TZCNT32rr:
5107 case X86::TZCNT32rm:
5108 case X86::TZCNT64rr:
5109 case X86::TZCNT64rm:
5110 return true;
5111 CASE_ND(AND64ri32)
5112 CASE_ND(AND32ri)
5113 CASE_ND(AND16ri)
5114 CASE_ND(AND8ri)
5115 CASE_ND(AND64rr)
5116 CASE_ND(AND32rr)
5117 CASE_ND(AND16rr)
5118 CASE_ND(AND8rr)
5119 CASE_ND(AND64rm)
5120 CASE_ND(AND32rm)
5121 CASE_ND(AND16rm)
5122 CASE_ND(AND8rm)
5123 CASE_ND(XOR64ri32)
5124 CASE_ND(XOR32ri)
5125 CASE_ND(XOR16ri)
5126 CASE_ND(XOR8ri)
5127 CASE_ND(XOR64rr)
5128 CASE_ND(XOR32rr)
5129 CASE_ND(XOR16rr)
5130 CASE_ND(XOR8rr)
5131 CASE_ND(XOR64rm)
5132 CASE_ND(XOR32rm)
5133 CASE_ND(XOR16rm)
5134 CASE_ND(XOR8rm)
5135 CASE_ND(OR64ri32)
5136 CASE_ND(OR32ri)
5137 CASE_ND(OR16ri)
5138 CASE_ND(OR8ri)
5139 CASE_ND(OR64rr)
5140 CASE_ND(OR32rr)
5141 CASE_ND(OR16rr)
5142 CASE_ND(OR8rr)
5143 CASE_ND(OR64rm)
5144 CASE_ND(OR32rm)
5145 CASE_ND(OR16rm)
5146 CASE_ND(OR8rm)
5147 case X86::ANDN32rr:
5148 case X86::ANDN32rm:
5149 case X86::ANDN64rr:
5150 case X86::ANDN64rm:
5151 case X86::BLSI32rr:
5152 case X86::BLSI32rm:
5153 case X86::BLSI64rr:
5154 case X86::BLSI64rm:
5155 case X86::BLSMSK32rr:
5156 case X86::BLSMSK32rm:
5157 case X86::BLSMSK64rr:
5158 case X86::BLSMSK64rm:
5159 case X86::BLSR32rr:
5160 case X86::BLSR32rm:
5161 case X86::BLSR64rr:
5162 case X86::BLSR64rm:
5163 case X86::BLCFILL32rr:
5164 case X86::BLCFILL32rm:
5165 case X86::BLCFILL64rr:
5166 case X86::BLCFILL64rm:
5167 case X86::BLCI32rr:
5168 case X86::BLCI32rm:
5169 case X86::BLCI64rr:
5170 case X86::BLCI64rm:
5171 case X86::BLCIC32rr:
5172 case X86::BLCIC32rm:
5173 case X86::BLCIC64rr:
5174 case X86::BLCIC64rm:
5175 case X86::BLCMSK32rr:
5176 case X86::BLCMSK32rm:
5177 case X86::BLCMSK64rr:
5178 case X86::BLCMSK64rm:
5179 case X86::BLCS32rr:
5180 case X86::BLCS32rm:
5181 case X86::BLCS64rr:
5182 case X86::BLCS64rm:
5183 case X86::BLSFILL32rr:
5184 case X86::BLSFILL32rm:
5185 case X86::BLSFILL64rr:
5186 case X86::BLSFILL64rm:
5187 case X86::BLSIC32rr:
5188 case X86::BLSIC32rm:
5189 case X86::BLSIC64rr:
5190 case X86::BLSIC64rm:
5191 case X86::BZHI32rr:
5192 case X86::BZHI32rm:
5193 case X86::BZHI64rr:
5194 case X86::BZHI64rm:
5195 case X86::T1MSKC32rr:
5196 case X86::T1MSKC32rm:
5197 case X86::T1MSKC64rr:
5198 case X86::T1MSKC64rm:
5199 case X86::TZMSK32rr:
5200 case X86::TZMSK32rm:
5201 case X86::TZMSK64rr:
5202 case X86::TZMSK64rm:
5203 // These instructions clear the overflow flag just like TEST.
5204 // FIXME: These are not the only instructions in this switch that clear the
5205 // overflow flag.
5206 ClearsOverflowFlag = true;
5207 return true;
5208 case X86::BEXTR32rr:
5209 case X86::BEXTR64rr:
5210 case X86::BEXTR32rm:
5211 case X86::BEXTR64rm:
5212 case X86::BEXTRI32ri:
5213 case X86::BEXTRI32mi:
5214 case X86::BEXTRI64ri:
5215 case X86::BEXTRI64mi:
5216 // BEXTR doesn't update the sign flag so we can't use it. It does clear
5217 // the overflow flag, but that's not useful without the sign flag.
5218 NoSignFlag = true;
5219 return true;
5220 }
5221}
5222
5223/// Check whether the use can be converted to remove a comparison against zero.
5225 switch (MI.getOpcode()) {
5226 default:
5227 return X86::COND_INVALID;
5228 CASE_ND(NEG8r)
5229 CASE_ND(NEG16r)
5230 CASE_ND(NEG32r)
5231 CASE_ND(NEG64r)
5232 return X86::COND_AE;
5233 case X86::LZCNT16rr:
5234 case X86::LZCNT32rr:
5235 case X86::LZCNT64rr:
5236 return X86::COND_B;
5237 case X86::POPCNT16rr:
5238 case X86::POPCNT32rr:
5239 case X86::POPCNT64rr:
5240 return X86::COND_E;
5241 case X86::TZCNT16rr:
5242 case X86::TZCNT32rr:
5243 case X86::TZCNT64rr:
5244 return X86::COND_B;
5245 case X86::BSF16rr:
5246 case X86::BSF32rr:
5247 case X86::BSF64rr:
5248 case X86::BSR16rr:
5249 case X86::BSR32rr:
5250 case X86::BSR64rr:
5251 return X86::COND_E;
5252 case X86::BLSI32rr:
5253 case X86::BLSI64rr:
5254 return X86::COND_AE;
5255 case X86::BLSR32rr:
5256 case X86::BLSR64rr:
5257 case X86::BLSMSK32rr:
5258 case X86::BLSMSK64rr:
5259 return X86::COND_B;
5260 // TODO: TBM instructions.
5261 }
5262}
5263
5264/// Check if there exists an earlier instruction that
5265/// operates on the same source operands and sets flags in the same way as
5266/// Compare; remove Compare if possible.
5268 Register SrcReg2, int64_t CmpMask,
5269 int64_t CmpValue,
5270 const MachineRegisterInfo *MRI) const {
5271 // Check whether we can replace SUB with CMP.
5272 switch (CmpInstr.getOpcode()) {
5273 default:
5274 break;
5275 CASE_ND(SUB64ri32)
5276 CASE_ND(SUB32ri)
5277 CASE_ND(SUB16ri)
5278 CASE_ND(SUB8ri)
5279 CASE_ND(SUB64rm)
5280 CASE_ND(SUB32rm)
5281 CASE_ND(SUB16rm)
5282 CASE_ND(SUB8rm)
5283 CASE_ND(SUB64rr)
5284 CASE_ND(SUB32rr)
5285 CASE_ND(SUB16rr)
5286 CASE_ND(SUB8rr) {
5287 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
5288 return false;
5289 // There is no use of the destination register, we can replace SUB with CMP.
5290 unsigned NewOpcode = 0;
5291#define FROM_TO(A, B) \
5292 CASE_ND(A) NewOpcode = X86::B; \
5293 break;
5294 switch (CmpInstr.getOpcode()) {
5295 default:
5296 llvm_unreachable("Unreachable!");
5297 FROM_TO(SUB64rm, CMP64rm)
5298 FROM_TO(SUB32rm, CMP32rm)
5299 FROM_TO(SUB16rm, CMP16rm)
5300 FROM_TO(SUB8rm, CMP8rm)
5301 FROM_TO(SUB64rr, CMP64rr)
5302 FROM_TO(SUB32rr, CMP32rr)
5303 FROM_TO(SUB16rr, CMP16rr)
5304 FROM_TO(SUB8rr, CMP8rr)
5305 FROM_TO(SUB64ri32, CMP64ri32)
5306 FROM_TO(SUB32ri, CMP32ri)
5307 FROM_TO(SUB16ri, CMP16ri)
5308 FROM_TO(SUB8ri, CMP8ri)
5309 }
5310#undef FROM_TO
5311 CmpInstr.setDesc(get(NewOpcode));
5312 CmpInstr.removeOperand(0);
5313 // Mutating this instruction invalidates any debug data associated with it.
5314 CmpInstr.dropDebugNumber();
5315 // Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
5316 if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
5317 NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
5318 return false;
5319 }
5320 }
5321
5322 // The following code tries to remove the comparison by re-using EFLAGS
5323 // from earlier instructions.
5324
5325 bool IsCmpZero = (CmpMask != 0 && CmpValue == 0);
5326
5327 // Transformation currently requires SSA values.
5328 if (SrcReg2.isPhysical())
5329 return false;
5330 MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
5331 assert(SrcRegDef && "Must have a definition (SSA)");
5332
5333 MachineInstr *MI = nullptr;
5334 MachineInstr *Sub = nullptr;
5335 MachineInstr *Movr0Inst = nullptr;
5336 bool NoSignFlag = false;
5337 bool ClearsOverflowFlag = false;
5338 bool ShouldUpdateCC = false;
5339 bool IsSwapped = false;
5341 int64_t ImmDelta = 0;
5342
5343 // Search backward from CmpInstr for the next instruction defining EFLAGS.
5345 MachineBasicBlock &CmpMBB = *CmpInstr.getParent();
5347 std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
5348 for (MachineBasicBlock *MBB = &CmpMBB;;) {
5349 for (MachineInstr &Inst : make_range(From, MBB->rend())) {
5350 // Try to use EFLAGS from the instruction defining %SrcReg. Example:
5351 // %eax = addl ...
5352 // ... // EFLAGS not changed
5353 // testl %eax, %eax // <-- can be removed
5354 if (&Inst == SrcRegDef) {
5355 if (IsCmpZero &&
5356 isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
5357 MI = &Inst;
5358 break;
5359 }
5360
5361 // Look back for the following pattern, in which case the
5362 // test16rr/test64rr instruction could be erased.
5363 //
5364 // Example for test16rr:
5365 // %reg = and32ri %in_reg, 5
5366 // ... // EFLAGS not changed.
5367 // %src_reg = copy %reg.sub_16bit:gr32
5368 // test16rr %src_reg, %src_reg, implicit-def $eflags
5369 // Example for test64rr:
5370 // %reg = and32ri %in_reg, 5
5371 // ... // EFLAGS not changed.
5372 // %src_reg = subreg_to_reg 0, %reg, %subreg.sub_index
5373 // test64rr %src_reg, %src_reg, implicit-def $eflags
5374 MachineInstr *AndInstr = nullptr;
5375 if (IsCmpZero &&
5376 findRedundantFlagInstr(CmpInstr, Inst, MRI, &AndInstr, TRI,
5377 NoSignFlag, ClearsOverflowFlag)) {
5378 assert(AndInstr != nullptr && X86::isAND(AndInstr->getOpcode()));
5379 MI = AndInstr;
5380 break;
5381 }
5382 // Cannot find other candidates before definition of SrcReg.
5383 return false;
5384 }
5385
5386 if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
5387 // Try to use EFLAGS produced by an instruction reading %SrcReg.
5388 // Example:
5389 // %eax = ...
5390 // ...
5391 // popcntl %eax
5392 // ... // EFLAGS not changed
5393 // testl %eax, %eax // <-- can be removed
5394 if (IsCmpZero) {
5395 NewCC = isUseDefConvertible(Inst);
5396 if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
5397 Inst.getOperand(1).getReg() == SrcReg) {
5398 ShouldUpdateCC = true;
5399 MI = &Inst;
5400 break;
5401 }
5402 }
5403
5404 // Try to use EFLAGS from an instruction with similar flag results.
5405 // Example:
5406 // sub x, y or cmp x, y
5407 // ... // EFLAGS not changed
5408 // cmp x, y // <-- can be removed
5409 if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, CmpValue,
5410 Inst, &IsSwapped, &ImmDelta)) {
5411 Sub = &Inst;
5412 break;
5413 }
5414
5415 // MOV32r0 is implemented with xor which clobbers condition code. It is
5416 // safe to move up, if the definition to EFLAGS is dead and earlier
5417 // instructions do not read or write EFLAGS.
5418 if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
5419 Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
5420 Movr0Inst = &Inst;
5421 continue;
5422 }
5423
5424 // Cannot do anything for any other EFLAG changes.
5425 return false;
5426 }
5427 }
5428
5429 if (MI || Sub)
5430 break;
5431
5432 // Reached begin of basic block. Continue in predecessor if there is
5433 // exactly one.
5434 if (MBB->pred_size() != 1)
5435 return false;
5436 MBB = *MBB->pred_begin();
5437 From = MBB->rbegin();
5438 }
5439
5440 // Scan forward from the instruction after CmpInstr for uses of EFLAGS.
5441 // It is safe to remove CmpInstr if EFLAGS is redefined or killed.
5442 // If we are done with the basic block, we need to check whether EFLAGS is
5443 // live-out.
5444 bool FlagsMayLiveOut = true;
5446 MachineBasicBlock::iterator AfterCmpInstr =
5447 std::next(MachineBasicBlock::iterator(CmpInstr));
5448 for (MachineInstr &Instr : make_range(AfterCmpInstr, CmpMBB.end())) {
5449 bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI);
5450 bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI);
5451 // We should check the usage if this instruction uses and updates EFLAGS.
5452 if (!UseEFLAGS && ModifyEFLAGS) {
5453 // It is safe to remove CmpInstr if EFLAGS is updated again.
5454 FlagsMayLiveOut = false;
5455 break;
5456 }
5457 if (!UseEFLAGS && !ModifyEFLAGS)
5458 continue;
5459
5460 // EFLAGS is used by this instruction.
5461 X86::CondCode OldCC = X86::getCondFromMI(Instr);
5462 if ((MI || IsSwapped || ImmDelta != 0) && OldCC == X86::COND_INVALID)
5463 return false;
5464
5465 X86::CondCode ReplacementCC = X86::COND_INVALID;
5466 if (MI) {
5467 switch (OldCC) {
5468 default:
5469 break;
5470 case X86::COND_A:
5471 case X86::COND_AE:
5472 case X86::COND_B:
5473 case X86::COND_BE:
5474 // CF is used, we can't perform this optimization.
5475 return false;
5476 case X86::COND_G:
5477 case X86::COND_GE:
5478 case X86::COND_L:
5479 case X86::COND_LE:
5480 // If SF is used, but the instruction doesn't update the SF, then we
5481 // can't do the optimization.
5482 if (NoSignFlag)
5483 return false;
5484 [[fallthrough]];
5485 case X86::COND_O:
5486 case X86::COND_NO:
5487 // If OF is used, the instruction needs to clear it like CmpZero does.
5488 if (!ClearsOverflowFlag)
5489 return false;
5490 break;
5491 case X86::COND_S:
5492 case X86::COND_NS:
5493 // If SF is used, but the instruction doesn't update the SF, then we
5494 // can't do the optimization.
5495 if (NoSignFlag)
5496 return false;
5497 break;
5498 }
5499
5500 // If we're updating the condition code check if we have to reverse the
5501 // condition.
5502 if (ShouldUpdateCC)
5503 switch (OldCC) {
5504 default:
5505 return false;
5506 case X86::COND_E:
5507 ReplacementCC = NewCC;
5508 break;
5509 case X86::COND_NE:
5510 ReplacementCC = GetOppositeBranchCondition(NewCC);
5511 break;
5512 }
5513 } else if (IsSwapped) {
5514 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs
5515 // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
5516 // We swap the condition code and synthesize the new opcode.
5517 ReplacementCC = getSwappedCondition(OldCC);
5518 if (ReplacementCC == X86::COND_INVALID)
5519 return false;
5520 ShouldUpdateCC = true;
5521 } else if (ImmDelta != 0) {
5522 unsigned BitWidth = TRI->getRegSizeInBits(*MRI->getRegClass(SrcReg));
5523 // Shift amount for min/max constants to adjust for 8/16/32 instruction
5524 // sizes.
5525 switch (OldCC) {
5526 case X86::COND_L: // x <s (C + 1) --> x <=s C
5527 if (ImmDelta != 1 || APInt::getSignedMinValue(BitWidth) == CmpValue)
5528 return false;
5529 ReplacementCC = X86::COND_LE;
5530 break;
5531 case X86::COND_B: // x <u (C + 1) --> x <=u C
5532 if (ImmDelta != 1 || CmpValue == 0)
5533 return false;
5534 ReplacementCC = X86::COND_BE;
5535 break;
5536 case X86::COND_GE: // x >=s (C + 1) --> x >s C
5537 if (ImmDelta != 1 || APInt::getSignedMinValue(BitWidth) == CmpValue)
5538 return false;
5539 ReplacementCC = X86::COND_G;
5540 break;
5541 case X86::COND_AE: // x >=u (C + 1) --> x >u C
5542 if (ImmDelta != 1 || CmpValue == 0)
5543 return false;
5544 ReplacementCC = X86::COND_A;
5545 break;
5546 case X86::COND_G: // x >s (C - 1) --> x >=s C
5547 if (ImmDelta != -1 || APInt::getSignedMaxValue(BitWidth) == CmpValue)
5548 return false;
5549 ReplacementCC = X86::COND_GE;
5550 break;
5551 case X86::COND_A: // x >u (C - 1) --> x >=u C
5552 if (ImmDelta != -1 || APInt::getMaxValue(BitWidth) == CmpValue)
5553 return false;
5554 ReplacementCC = X86::COND_AE;
5555 break;
5556 case X86::COND_LE: // x <=s (C - 1) --> x <s C
5557 if (ImmDelta != -1 || APInt::getSignedMaxValue(BitWidth) == CmpValue)
5558 return false;
5559 ReplacementCC = X86::COND_L;
5560 break;
5561 case X86::COND_BE: // x <=u (C - 1) --> x <u C
5562 if (ImmDelta != -1 || APInt::getMaxValue(BitWidth) == CmpValue)
5563 return false;
5564 ReplacementCC = X86::COND_B;
5565 break;
5566 default:
5567 return false;
5568 }
5569 ShouldUpdateCC = true;
5570 }
5571
5572 if (ShouldUpdateCC && ReplacementCC != OldCC) {
5573 // Push the MachineInstr to OpsToUpdate.
5574 // If it is safe to remove CmpInstr, the condition code of these
5575 // instructions will be modified.
5576 OpsToUpdate.push_back(std::make_pair(&Instr, ReplacementCC));
5577 }
5578 if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
5579 // It is safe to remove CmpInstr if EFLAGS is updated again or killed.
5580 FlagsMayLiveOut = false;
5581 break;
5582 }
5583 }
5584
5585 // If we have to update users but EFLAGS is live-out abort, since we cannot
5586 // easily find all of the users.
5587 if ((MI != nullptr || ShouldUpdateCC) && FlagsMayLiveOut) {
5588 for (MachineBasicBlock *Successor : CmpMBB.successors())
5589 if (Successor->isLiveIn(X86::EFLAGS))
5590 return false;
5591 }
5592
5593 // The instruction to be updated is either Sub or MI.
5594 assert((MI == nullptr || Sub == nullptr) && "Should not have Sub and MI set");
5595 Sub = MI != nullptr ? MI : Sub;
5596 MachineBasicBlock *SubBB = Sub->getParent();
5597 // Move Movr0Inst to the appropriate place before Sub.
5598 if (Movr0Inst) {
5599 // Only move within the same block so we don't accidentally move to a
5600 // block with higher execution frequency.
5601 if (&CmpMBB != SubBB)
5602 return false;
5603 // Look backwards until we find a def that doesn't use the current EFLAGS.
5605 InsertE = Sub->getParent()->rend();
5606 for (; InsertI != InsertE; ++InsertI) {
5607 MachineInstr *Instr = &*InsertI;
5608 if (!Instr->readsRegister(X86::EFLAGS, TRI) &&
5609 Instr->modifiesRegister(X86::EFLAGS, TRI)) {
5610 Movr0Inst->getParent()->remove(Movr0Inst);
5611 Instr->getParent()->insert(MachineBasicBlock::iterator(Instr),
5612 Movr0Inst);
5613 break;
5614 }
5615 }
5616 if (InsertI == InsertE)
5617 return false;
5618 }
5619
5620 // Make sure Sub instruction defines EFLAGS and mark the def live.
5621 MachineOperand *FlagDef =
5622 Sub->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
5623 assert(FlagDef && "Unable to locate a def EFLAGS operand");
5624 FlagDef->setIsDead(false);
5625
5626 CmpInstr.eraseFromParent();
5627
5628 // Modify the condition code of instructions in OpsToUpdate.
5629 for (auto &Op : OpsToUpdate) {
5630 Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1)
5631 .setImm(Op.second);
5632 }
5633 // Add EFLAGS to block live-ins between CmpBB and block of flags producer.
5634 for (MachineBasicBlock *MBB = &CmpMBB; MBB != SubBB;
5635 MBB = *MBB->pred_begin()) {
5636 assert(MBB->pred_size() == 1 && "Expected exactly one predecessor");
5637 if (!MBB->isLiveIn(X86::EFLAGS))
5638 MBB->addLiveIn(X86::EFLAGS);
5639 }
5640 return true;
5641}
5642
5643/// Try to remove the load by folding it to a register
5644/// operand at the use. We fold the load instructions if load defines a virtual
5645/// register, the virtual register is used once in the same BB, and the
5646/// instructions in-between do not load or store, and have no side effects.
5648 const MachineRegisterInfo *MRI,
5649 Register &FoldAsLoadDefReg,
5650 MachineInstr *&DefMI) const {
5651 // Check whether we can move DefMI here.
5652 DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
5653 assert(DefMI);
5654 bool SawStore = false;
5655 if (!DefMI->isSafeToMove(SawStore))
5656 return nullptr;
5657
5658 // Collect information about virtual register operands of MI.
5659 SmallVector<unsigned, 1> SrcOperandIds;
5660 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
5661 MachineOperand &MO = MI.getOperand(i);
5662 if (!MO.isReg())
5663 continue;
5664 Register Reg = MO.getReg();
5665 if (Reg != FoldAsLoadDefReg)
5666 continue;
5667 // Do not fold if we have a subreg use or a def.
5668 if (MO.getSubReg() || MO.isDef())
5669 return nullptr;
5670 SrcOperandIds.push_back(i);
5671 }
5672 if (SrcOperandIds.empty())
5673 return nullptr;
5674
5675 // Check whether we can fold the def into SrcOperandId.
5676 if (MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandIds, *DefMI)) {
5677 FoldAsLoadDefReg = 0;
5678 return FoldMI;
5679 }
5680
5681 return nullptr;
5682}
5683
5684/// \returns true if the instruction can be changed to COPY when imm is 0.
5685static bool canConvert2Copy(unsigned Opc) {
5686 switch (Opc) {
5687 default:
5688 return false;
5689 CASE_ND(ADD64ri32)
5690 CASE_ND(SUB64ri32)
5691 CASE_ND(OR64ri32)
5692 CASE_ND(XOR64ri32)
5693 CASE_ND(ADD32ri)
5694 CASE_ND(SUB32ri)
5695 CASE_ND(OR32ri)
5696 CASE_ND(XOR32ri)
5697 return true;
5698 }
5699}
5700
5701/// Convert an ALUrr opcode to corresponding ALUri opcode. Such as
5702/// ADD32rr ==> ADD32ri
5703static unsigned convertALUrr2ALUri(unsigned Opc) {
5704 switch (Opc) {
5705 default:
5706 return 0;
5707#define FROM_TO(FROM, TO) \
5708 case X86::FROM: \
5709 return X86::TO; \
5710 case X86::FROM##_ND: \
5711 return X86::TO##_ND;
5712 FROM_TO(ADD64rr, ADD64ri32)
5713 FROM_TO(ADC64rr, ADC64ri32)
5714 FROM_TO(SUB64rr, SUB64ri32)
5715 FROM_TO(SBB64rr, SBB64ri32)
5716 FROM_TO(AND64rr, AND64ri32)
5717 FROM_TO(OR64rr, OR64ri32)
5718 FROM_TO(XOR64rr, XOR64ri32)
5719 FROM_TO(SHR64rCL, SHR64ri)
5720 FROM_TO(SHL64rCL, SHL64ri)
5721 FROM_TO(SAR64rCL, SAR64ri)
5722 FROM_TO(ROL64rCL, ROL64ri)
5723 FROM_TO(ROR64rCL, ROR64ri)
5724 FROM_TO(RCL64rCL, RCL64ri)
5725 FROM_TO(RCR64rCL, RCR64ri)
5726 FROM_TO(ADD32rr, ADD32ri)
5727 FROM_TO(ADC32rr, ADC32ri)
5728 FROM_TO(SUB32rr, SUB32ri)
5729 FROM_TO(SBB32rr, SBB32ri)
5730 FROM_TO(AND32rr, AND32ri)
5731 FROM_TO(OR32rr, OR32ri)
5732 FROM_TO(XOR32rr, XOR32ri)
5733 FROM_TO(SHR32rCL, SHR32ri)
5734 FROM_TO(SHL32rCL, SHL32ri)
5735 FROM_TO(SAR32rCL, SAR32ri)
5736 FROM_TO(ROL32rCL, ROL32ri)
5737 FROM_TO(ROR32rCL, ROR32ri)
5738 FROM_TO(RCL32rCL, RCL32ri)
5739 FROM_TO(RCR32rCL, RCR32ri)
5740#undef FROM_TO
5741#define FROM_TO(FROM, TO) \
5742 case X86::FROM: \
5743 return X86::TO;
5744 FROM_TO(TEST64rr, TEST64ri32)
5745 FROM_TO(CTEST64rr, CTEST64ri32)
5746 FROM_TO(CMP64rr, CMP64ri32)
5747 FROM_TO(CCMP64rr, CCMP64ri32)
5748 FROM_TO(TEST32rr, TEST32ri)
5749 FROM_TO(CTEST32rr, CTEST32ri)
5750 FROM_TO(CMP32rr, CMP32ri)
5751 FROM_TO(CCMP32rr, CCMP32ri)
5752#undef FROM_TO
5753 }
5754}
5755
5756/// Reg is assigned ImmVal in DefMI, and is used in UseMI.
5757/// If MakeChange is true, this function tries to replace Reg by ImmVal in
5758/// UseMI. If MakeChange is false, just check if folding is possible.
5759//
5760/// \returns true if folding is successful or possible.
5761bool X86InstrInfo::foldImmediateImpl(MachineInstr &UseMI, MachineInstr *DefMI,
5762 Register Reg, int64_t ImmVal,
5764 bool MakeChange) const {
5765 bool Modified = false;
5766
5767 // 64 bit operations accept sign extended 32 bit immediates.
5768 // 32 bit operations accept all 32 bit immediates, so we don't need to check
5769 // them.
5770 const TargetRegisterClass *RC = nullptr;
5771 if (Reg.isVirtual())
5772 RC = MRI->getRegClass(Reg);
5773 if ((Reg.isPhysical() && X86::GR64RegClass.contains(Reg)) ||
5774 (Reg.isVirtual() && X86::GR64RegClass.hasSubClassEq(RC))) {
5775 if (!isInt<32>(ImmVal))
5776 return false;
5777 }
5778
5779 if (UseMI.findRegisterUseOperand(Reg, /*TRI=*/nullptr)->getSubReg())
5780 return false;
5781 // Immediate has larger code size than register. So avoid folding the
5782 // immediate if it has more than 1 use and we are optimizing for size.
5783 if (UseMI.getMF()->getFunction().hasOptSize() && Reg.isVirtual() &&
5784 !MRI->hasOneNonDBGUse(Reg))
5785 return false;
5786
5787 unsigned Opc = UseMI.getOpcode();
5788 unsigned NewOpc;
5789 if (Opc == TargetOpcode::COPY) {
5790 Register ToReg = UseMI.getOperand(0).getReg();
5791 const TargetRegisterClass *RC = nullptr;
5792 if (ToReg.isVirtual())
5793 RC = MRI->getRegClass(ToReg);
5794 bool GR32Reg = (ToReg.isVirtual() && X86::GR32RegClass.hasSubClassEq(RC)) ||
5795 (ToReg.isPhysical() && X86::GR32RegClass.contains(ToReg));
5796 bool GR64Reg = (ToReg.isVirtual() && X86::GR64RegClass.hasSubClassEq(RC)) ||
5797 (ToReg.isPhysical() && X86::GR64RegClass.contains(ToReg));
5798 bool GR8Reg = (ToReg.isVirtual() && X86::GR8RegClass.hasSubClassEq(RC)) ||
5799 (ToReg.isPhysical() && X86::GR8RegClass.contains(ToReg));
5800
5801 if (ImmVal == 0) {
5802 // We have MOV32r0 only.
5803 if (!GR32Reg)
5804 return false;
5805 }
5806
5807 if (GR64Reg) {
5808 if (isUInt<32>(ImmVal))
5809 NewOpc = X86::MOV32ri64;
5810 else
5811 NewOpc = X86::MOV64ri;
5812 } else if (GR32Reg) {
5813 NewOpc = X86::MOV32ri;
5814 if (ImmVal == 0) {
5815 // MOV32r0 clobbers EFLAGS.
5817 if (UseMI.getParent()->computeRegisterLiveness(
5818 TRI, X86::EFLAGS, UseMI) != MachineBasicBlock::LQR_Dead)
5819 return false;
5820
5821 // MOV32r0 is different than other cases because it doesn't encode the
5822 // immediate in the instruction. So we directly modify it here.
5823 if (!MakeChange)
5824 return true;
5825 UseMI.setDesc(get(X86::MOV32r0));
5826 UseMI.removeOperand(
5827 UseMI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr));
5828 UseMI.addOperand(MachineOperand::CreateReg(X86::EFLAGS, /*isDef=*/true,
5829 /*isImp=*/true,
5830 /*isKill=*/false,
5831 /*isDead=*/true));
5832 Modified = true;
5833 }
5834 } else if (GR8Reg)
5835 NewOpc = X86::MOV8ri;
5836 else
5837 return false;
5838 } else
5839 NewOpc = convertALUrr2ALUri(Opc);
5840
5841 if (!NewOpc)
5842 return false;
5843
5844 // For SUB instructions the immediate can only be the second source operand.
5845 if ((NewOpc == X86::SUB64ri32 || NewOpc == X86::SUB32ri ||
5846 NewOpc == X86::SBB64ri32 || NewOpc == X86::SBB32ri ||
5847 NewOpc == X86::SUB64ri32_ND || NewOpc == X86::SUB32ri_ND ||
5848 NewOpc == X86::SBB64ri32_ND || NewOpc == X86::SBB32ri_ND) &&
5849 UseMI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr) != 2)
5850 return false;
5851 // For CMP instructions the immediate can only be at index 1.
5852 if (((NewOpc == X86::CMP64ri32 || NewOpc == X86::CMP32ri) ||
5853 (NewOpc == X86::CCMP64ri32 || NewOpc == X86::CCMP32ri)) &&
5854 UseMI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr) != 1)
5855 return false;
5856
5857 using namespace X86;
5858 if (isSHL(Opc) || isSHR(Opc) || isSAR(Opc) || isROL(Opc) || isROR(Opc) ||
5859 isRCL(Opc) || isRCR(Opc)) {
5860 unsigned RegIdx = UseMI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr);
5861 if (RegIdx < 2)
5862 return false;
5863 if (!isInt<8>(ImmVal))
5864 return false;
5865 assert(Reg == X86::CL);
5866
5867 if (!MakeChange)
5868 return true;
5869 UseMI.setDesc(get(NewOpc));
5870 UseMI.removeOperand(RegIdx);
5871 UseMI.addOperand(MachineOperand::CreateImm(ImmVal));
5872 // Reg is physical register $cl, so we don't know if DefMI is dead through
5873 // MRI. Let the caller handle it, or pass dead-mi-elimination can delete
5874 // the dead physical register define instruction.
5875 return true;
5876 }
5877
5878 if (!MakeChange)
5879 return true;
5880
5881 if (!Modified) {
5882 // Modify the instruction.
5883 if (ImmVal == 0 && canConvert2Copy(NewOpc) &&
5884 UseMI.registerDefIsDead(X86::EFLAGS, /*TRI=*/nullptr)) {
5885 // %100 = add %101, 0
5886 // ==>
5887 // %100 = COPY %101
5888 UseMI.setDesc(get(TargetOpcode::COPY));
5889 UseMI.removeOperand(
5890 UseMI.findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr));
5891 UseMI.removeOperand(
5892 UseMI.findRegisterDefOperandIdx(X86::EFLAGS, /*TRI=*/nullptr));
5893 UseMI.untieRegOperand(0);
5896 } else {
5897 unsigned Op1 = 1, Op2 = CommuteAnyOperandIndex;
5898 unsigned ImmOpNum = 2;
5899 if (!UseMI.getOperand(0).isDef()) {
5900 Op1 = 0; // TEST, CMP, CTEST, CCMP
5901 ImmOpNum = 1;
5902 }
5903 if (Opc == TargetOpcode::COPY)
5904 ImmOpNum = 1;
5905 if (findCommutedOpIndices(UseMI, Op1, Op2) &&
5906 UseMI.getOperand(Op1).getReg() == Reg)
5907 commuteInstruction(UseMI);
5908
5909 assert(UseMI.getOperand(ImmOpNum).getReg() == Reg);
5910 UseMI.setDesc(get(NewOpc));
5911 UseMI.getOperand(ImmOpNum).ChangeToImmediate(ImmVal);
5912 }
5913 }
5914
5915 if (Reg.isVirtual() && MRI->use_nodbg_empty(Reg))
5917
5918 return true;
5919}
5920
5921/// foldImmediate - 'Reg' is known to be defined by a move immediate
5922/// instruction, try to fold the immediate into the use instruction.
5924 Register Reg, MachineRegisterInfo *MRI) const {
5925 int64_t ImmVal;
5926 if (!getConstValDefinedInReg(DefMI, Reg, ImmVal))
5927 return false;
5928
5929 return foldImmediateImpl(UseMI, &DefMI, Reg, ImmVal, MRI, true);
5930}
5931
5932/// Expand a single-def pseudo instruction to a two-addr
5933/// instruction with two undef reads of the register being defined.
5934/// This is used for mapping:
5935/// %xmm4 = V_SET0
5936/// to:
5937/// %xmm4 = PXORrr undef %xmm4, undef %xmm4
5938///
5940 const MCInstrDesc &Desc) {
5941 assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
5942 Register Reg = MIB.getReg(0);
5943 MIB->setDesc(Desc);
5944
5945 // MachineInstr::addOperand() will insert explicit operands before any
5946 // implicit operands.
5948 // But we don't trust that.
5949 assert(MIB.getReg(1) == Reg && MIB.getReg(2) == Reg && "Misplaced operand");
5950 return true;
5951}
5952
5953/// Expand a single-def pseudo instruction to a two-addr
5954/// instruction with two %k0 reads.
5955/// This is used for mapping:
5956/// %k4 = K_SET1
5957/// to:
5958/// %k4 = KXNORrr %k0, %k0
5960 Register Reg) {
5961 assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
5962 MIB->setDesc(Desc);
5964 return true;
5965}
5966
5968 bool MinusOne) {
5969 MachineBasicBlock &MBB = *MIB->getParent();
5970 const DebugLoc &DL = MIB->getDebugLoc();
5971 Register Reg = MIB.getReg(0);
5972
5973 // Insert the XOR.
5974 BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg)
5975 .addReg(Reg, RegState::Undef)
5976 .addReg(Reg, RegState::Undef);
5977
5978 // Turn the pseudo into an INC or DEC.
5979 MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r));
5980 MIB.addReg(Reg);
5981
5982 return true;
5983}
5984
5986 const TargetInstrInfo &TII,
5987 const X86Subtarget &Subtarget) {
5988 MachineBasicBlock &MBB = *MIB->getParent();
5989 const DebugLoc &DL = MIB->getDebugLoc();
5990 int64_t Imm = MIB->getOperand(1).getImm();
5991 assert(Imm != 0 && "Using push/pop for 0 is not efficient.");
5993
5994 int StackAdjustment;
5995
5996 if (Subtarget.is64Bit()) {
5997 assert(MIB->getOpcode() == X86::MOV64ImmSExti8 ||
5998 MIB->getOpcode() == X86::MOV32ImmSExti8);
5999
6000 // Can't use push/pop lowering if the function might write to the red zone.
6001 X86MachineFunctionInfo *X86FI =
6003 if (X86FI->getUsesRedZone()) {
6004 MIB->setDesc(TII.get(MIB->getOpcode() == X86::MOV32ImmSExti8
6005 ? X86::MOV32ri
6006 : X86::MOV64ri));
6007 return true;
6008 }
6009
6010 // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and
6011 // widen the register if necessary.
6012 StackAdjustment = 8;
6013 BuildMI(MBB, I, DL, TII.get(X86::PUSH64i32)).addImm(Imm);
6014 MIB->setDesc(TII.get(X86::POP64r));
6015 MIB->getOperand(0).setReg(getX86SubSuperRegister(MIB.getReg(0), 64));
6016 } else {
6017 assert(MIB->getOpcode() == X86::MOV32ImmSExti8);
6018 StackAdjustment = 4;
6019 BuildMI(MBB, I, DL, TII.get(X86::PUSH32i)).addImm(Imm);
6020 MIB->setDesc(TII.get(X86::POP32r));
6021 }
6022 MIB->removeOperand(1);
6024
6025 // Build CFI if necessary.
6026 MachineFunction &MF = *MBB.getParent();
6027 const X86FrameLowering *TFL = Subtarget.getFrameLowering();
6028 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
6029 bool NeedsDwarfCFI = !IsWin64Prologue && MF.needsFrameMoves();
6030 bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI;
6031 if (EmitCFI) {
6032 TFL->BuildCFI(
6033 MBB, I, DL,
6034 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
6035 TFL->BuildCFI(
6036 MBB, std::next(I), DL,
6037 MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment));
6038 }
6039
6040 return true;
6041}
6042
6043// LoadStackGuard has so far only been implemented for 64-bit MachO. Different
6044// code sequence is needed for other targets.
6046 const TargetInstrInfo &TII) {
6047 MachineBasicBlock &MBB = *MIB->getParent();
6048 const DebugLoc &DL = MIB->getDebugLoc();
6049 Register Reg = MIB.getReg(0);
6050 const GlobalValue *GV =
6051 cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
6052 auto Flags = MachineMemOperand::MOLoad |
6056 MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 8, Align(8));
6058
6059 BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg)
6060 .addReg(X86::RIP)
6061 .addImm(1)
6062 .addReg(0)
6064 .addReg(0)
6065 .addMemOperand(MMO);
6066 MIB->setDebugLoc(DL);
6067 MIB->setDesc(TII.get(X86::MOV64rm));
6068 MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0);
6069}
6070
6072 MachineBasicBlock &MBB = *MIB->getParent();
6073 MachineFunction &MF = *MBB.getParent();
6074 const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
6075 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
6076 unsigned XorOp =
6077 MIB->getOpcode() == X86::XOR64_FP ? X86::XOR64rr : X86::XOR32rr;
6078 MIB->setDesc(TII.get(XorOp));
6079 MIB.addReg(TRI->getFrameRegister(MF), RegState::Undef);
6080 return true;
6081}
6082
6083// This is used to handle spills for 128/256-bit registers when we have AVX512,
6084// but not VLX. If it uses an extended register we need to use an instruction
6085// that loads the lower 128/256-bit, but is available with only AVX512F.
6087 const TargetRegisterInfo *TRI,
6088 const MCInstrDesc &LoadDesc,
6089 const MCInstrDesc &BroadcastDesc, unsigned SubIdx) {
6090 Register DestReg = MIB.getReg(0);
6091 // Check if DestReg is XMM16-31 or YMM16-31.
6092 if (TRI->getEncodingValue(DestReg) < 16) {
6093 // We can use a normal VEX encoded load.
6094 MIB->setDesc(LoadDesc);
6095 } else {
6096 // Use a 128/256-bit VBROADCAST instruction.
6097 MIB->setDesc(BroadcastDesc);
6098 // Change the destination to a 512-bit register.
6099 DestReg = TRI->getMatchingSuperReg(DestReg, SubIdx, &X86::VR512RegClass);
6100 MIB->getOperand(0).setReg(DestReg);
6101 }
6102 return true;
6103}
6104
6105// This is used to handle spills for 128/256-bit registers when we have AVX512,
6106// but not VLX. If it uses an extended register we need to use an instruction
6107// that stores the lower 128/256-bit, but is available with only AVX512F.
6109 const TargetRegisterInfo *TRI,
6110 const MCInstrDesc &StoreDesc,
6111 const MCInstrDesc &ExtractDesc, unsigned SubIdx) {
6112 Register SrcReg = MIB.getReg(X86::AddrNumOperands);
6113 // Check if DestReg is XMM16-31 or YMM16-31.
6114 if (TRI->getEncodingValue(SrcReg) < 16) {
6115 // We can use a normal VEX encoded store.
6116 MIB->setDesc(StoreDesc);
6117 } else {
6118 // Use a VEXTRACTF instruction.
6119 MIB->setDesc(ExtractDesc);
6120 // Change the destination to a 512-bit register.
6121 SrcReg = TRI->getMatchingSuperReg(SrcReg, SubIdx, &X86::VR512RegClass);
6123 MIB.addImm(0x0); // Append immediate to extract from the lower bits.
6124 }
6125
6126 return true;
6127}
6128
6130 MIB->setDesc(Desc);
6131 int64_t ShiftAmt = MIB->getOperand(2).getImm();
6132 // Temporarily remove the immediate so we can add another source register.
6133 MIB->removeOperand(2);
6134 // Add the register. Don't copy the kill flag if there is one.
6135 MIB.addReg(MIB.getReg(1), getUndefRegState(MIB->getOperand(1).isUndef()));
6136 // Add back the immediate.
6137 MIB.addImm(ShiftAmt);
6138 return true;
6139}
6140
6142 bool HasAVX = Subtarget.hasAVX();
6143 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
6144 switch (MI.getOpcode()) {
6145 case X86::MOV32r0:
6146 return Expand2AddrUndef(MIB, get(X86::XOR32rr));
6147 case X86::MOV32r1:
6148 return expandMOV32r1(MIB, *this, /*MinusOne=*/false);
6149 case X86::MOV32r_1:
6150 return expandMOV32r1(MIB, *this, /*MinusOne=*/true);
6151 case X86::MOV32ImmSExti8:
6152 case X86::MOV64ImmSExti8:
6153 return ExpandMOVImmSExti8(MIB, *this, Subtarget);
6154 case X86::SETB_C32r:
6155 return Expand2AddrUndef(MIB, get(X86::SBB32rr));
6156 case X86::SETB_C64r:
6157 return Expand2AddrUndef(MIB, get(X86::SBB64rr));
6158 case X86::MMX_SET0:
6159 return Expand2AddrUndef(MIB, get(X86::MMX_PXORrr));
6160 case X86::V_SET0:
6161 case X86::FsFLD0SS:
6162 case X86::FsFLD0SD:
6163 case X86::FsFLD0SH:
6164 case X86::FsFLD0F128:
6165 return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
6166 case X86::AVX_SET0: {
6167 assert(HasAVX && "AVX not supported");
6169 Register SrcReg = MIB.getReg(0);
6170 Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
6171 MIB->getOperand(0).setReg(XReg);
6172 Expand2AddrUndef(MIB, get(X86::VXORPSrr));
6173 MIB.addReg(SrcReg, RegState::ImplicitDefine);
6174 return true;
6175 }
6176 case X86::AVX512_128_SET0:
6177 case X86::AVX512_FsFLD0SH:
6178 case X86::AVX512_FsFLD0SS:
6179 case X86::AVX512_FsFLD0SD:
6180 case X86::AVX512_FsFLD0F128: {
6181 bool HasVLX = Subtarget.hasVLX();
6182 Register SrcReg = MIB.getReg(0);
6184 if (HasVLX || TRI->getEncodingValue(SrcReg) < 16)
6185 return Expand2AddrUndef(MIB,
6186 get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
6187 // Extended register without VLX. Use a larger XOR.
6188 SrcReg =
6189 TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass);
6190 MIB->getOperand(0).setReg(SrcReg);
6191 return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
6192 }
6193 case X86::AVX512_256_SET0:
6194 case X86::AVX512_512_SET0: {
6195 bool HasVLX = Subtarget.hasVLX();
6196 Register SrcReg = MIB.getReg(0);
6198 if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) {
6199 Register XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
6200 MIB->getOperand(0).setReg(XReg);
6201 Expand2AddrUndef(MIB, get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
6202 MIB.addReg(SrcReg, RegState::ImplicitDefine);
6203 return true;
6204 }
6205 if (MI.getOpcode() == X86::AVX512_256_SET0) {
6206 // No VLX so we must reference a zmm.
6207 unsigned ZReg =
6208 TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
6209 MIB->getOperand(0).setReg(ZReg);
6210 }
6211 return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
6212 }
6213 case X86::V_SETALLONES:
6214 return Expand2AddrUndef(MIB,
6215 get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
6216 case X86::AVX2_SETALLONES:
6217 return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
6218 case X86::AVX1_SETALLONES: {
6219 Register Reg = MIB.getReg(0);
6220 // VCMPPSYrri with an immediate 0xf should produce VCMPTRUEPS.
6221 MIB->setDesc(get(X86::VCMPPSYrri));
6222 MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf);
6223 return true;
6224 }
6225 case X86::AVX512_512_SETALLONES: {
6226 Register Reg = MIB.getReg(0);
6227 MIB->setDesc(get(X86::VPTERNLOGDZrri));
6228 // VPTERNLOGD needs 3 register inputs and an immediate.
6229 // 0xff will return 1s for any input.
6230 MIB.addReg(Reg, RegState::Undef)
6231 .addReg(Reg, RegState::Undef)
6232 .addReg(Reg, RegState::Undef)
6233 .addImm(0xff);
6234 return true;
6235 }
6236 case X86::AVX512_512_SEXT_MASK_32:
6237 case X86::AVX512_512_SEXT_MASK_64: {
6238 Register Reg = MIB.getReg(0);
6239 Register MaskReg = MIB.getReg(1);
6240 unsigned MaskState = getRegState(MIB->getOperand(1));
6241 unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64)
6242 ? X86::VPTERNLOGQZrrikz
6243 : X86::VPTERNLOGDZrrikz;
6244 MI.removeOperand(1);
6245 MIB->setDesc(get(Opc));
6246 // VPTERNLOG needs 3 register inputs and an immediate.
6247 // 0xff will return 1s for any input.
6248 MIB.addReg(Reg, RegState::Undef)
6249 .addReg(MaskReg, MaskState)
6250 .addReg(Reg, RegState::Undef)
6251 .addReg(Reg, RegState::Undef)
6252 .addImm(0xff);
6253 return true;
6254 }
6255 case X86::VMOVAPSZ128rm_NOVLX:
6256 return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSrm),
6257 get(X86::VBROADCASTF32X4Zrm), X86::sub_xmm);
6258 case X86::VMOVUPSZ128rm_NOVLX:
6259 return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSrm),
6260 get(X86::VBROADCASTF32X4Zrm), X86::sub_xmm);
6261 case X86::VMOVAPSZ256rm_NOVLX:
6262 return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSYrm),
6263 get(X86::VBROADCASTF64X4Zrm), X86::sub_ymm);
6264 case X86::VMOVUPSZ256rm_NOVLX:
6265 return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSYrm),
6266 get(X86::VBROADCASTF64X4Zrm), X86::sub_ymm);
6267 case X86::VMOVAPSZ128mr_NOVLX:
6268 return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSmr),
6269 get(X86::VEXTRACTF32X4Zmri), X86::sub_xmm);
6270 case X86::VMOVUPSZ128mr_NOVLX:
6271 return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSmr),
6272 get(X86::VEXTRACTF32X4Zmri), X86::sub_xmm);
6273 case X86::VMOVAPSZ256mr_NOVLX:
6274 return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSYmr),
6275 get(X86::VEXTRACTF64X4Zmri), X86::sub_ymm);
6276 case X86::VMOVUPSZ256mr_NOVLX:
6277 return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),
6278 get(X86::VEXTRACTF64X4Zmri), X86::sub_ymm);
6279 case X86::MOV32ri64: {
6280 Register Reg = MIB.getReg(0);
6281 Register Reg32 = RI.getSubReg(Reg, X86::sub_32bit);
6282 MI.setDesc(get(X86::MOV32ri));
6283 MIB->getOperand(0).setReg(Reg32);
6285 return true;
6286 }
6287
6288 case X86::RDFLAGS32:
6289 case X86::RDFLAGS64: {
6290 unsigned Is64Bit = MI.getOpcode() == X86::RDFLAGS64;
6291 MachineBasicBlock &MBB = *MIB->getParent();
6292
6293 MachineInstr *NewMI = BuildMI(MBB, MI, MIB->getDebugLoc(),
6294 get(Is64Bit ? X86::PUSHF64 : X86::PUSHF32))
6295 .getInstr();
6296
6297 // Permit reads of the EFLAGS and DF registers without them being defined.
6298 // This intrinsic exists to read external processor state in flags, such as
6299 // the trap flag, interrupt flag, and direction flag, none of which are
6300 // modeled by the backend.
6301 assert(NewMI->getOperand(2).getReg() == X86::EFLAGS &&
6302 "Unexpected register in operand! Should be EFLAGS.");
6303 NewMI->getOperand(2).setIsUndef();
6304 assert(NewMI->getOperand(3).getReg() == X86::DF &&
6305 "Unexpected register in operand! Should be DF.");
6306 NewMI->getOperand(3).setIsUndef();
6307
6308 MIB->setDesc(get(Is64Bit ? X86::POP64r : X86::POP32r));
6309 return true;
6310 }
6311
6312 case X86::WRFLAGS32:
6313 case X86::WRFLAGS64: {
6314 unsigned Is64Bit = MI.getOpcode() == X86::WRFLAGS64;
6315 MachineBasicBlock &MBB = *MIB->getParent();
6316
6317 BuildMI(MBB, MI, MIB->getDebugLoc(),
6318 get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
6319 .addReg(MI.getOperand(0).getReg());
6320 BuildMI(MBB, MI, MIB->getDebugLoc(),
6321 get(Is64Bit ? X86::POPF64 : X86::POPF32));
6322 MI.eraseFromParent();
6323 return true;
6324 }
6325
6326 // KNL does not recognize dependency-breaking idioms for mask registers,
6327 // so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
6328 // Using %k0 as the undef input register is a performance heuristic based
6329 // on the assumption that %k0 is used less frequently than the other mask
6330 // registers, since it is not usable as a write mask.
6331 // FIXME: A more advanced approach would be to choose the best input mask
6332 // register based on context.
6333 case X86::KSET0W:
6334 return Expand2AddrKreg(MIB, get(X86::KXORWkk), X86::K0);
6335 case X86::KSET0D:
6336 return Expand2AddrKreg(MIB, get(X86::KXORDkk), X86::K0);
6337 case X86::KSET0Q:
6338 return Expand2AddrKreg(MIB, get(X86::KXORQkk), X86::K0);
6339 case X86::KSET1W:
6340 return Expand2AddrKreg(MIB, get(X86::KXNORWkk), X86::K0);
6341 case X86::KSET1D:
6342 return Expand2AddrKreg(MIB, get(X86::KXNORDkk), X86::K0);
6343 case X86::KSET1Q:
6344 return Expand2AddrKreg(MIB, get(X86::KXNORQkk), X86::K0);
6345 case TargetOpcode::LOAD_STACK_GUARD:
6346 expandLoadStackGuard(MIB, *this);
6347 return true;
6348 case X86::XOR64_FP:
6349 case X86::XOR32_FP:
6350 return expandXorFP(MIB, *this);
6351 case X86::SHLDROT32ri:
6352 return expandSHXDROT(MIB, get(X86::SHLD32rri8));
6353 case X86::SHLDROT64ri:
6354 return expandSHXDROT(MIB, get(X86::SHLD64rri8));
6355 case X86::SHRDROT32ri:
6356 return expandSHXDROT(MIB, get(X86::SHRD32rri8));
6357 case X86::SHRDROT64ri:
6358 return expandSHXDROT(MIB, get(X86::SHRD64rri8));
6359 case X86::ADD8rr_DB:
6360 MIB->setDesc(get(X86::OR8rr));
6361 break;
6362 case X86::ADD16rr_DB:
6363 MIB->setDesc(get(X86::OR16rr));
6364 break;
6365 case X86::ADD32rr_DB:
6366 MIB->setDesc(get(X86::OR32rr));
6367 break;
6368 case X86::ADD64rr_DB:
6369 MIB->setDesc(get(X86::OR64rr));
6370 break;
6371 case X86::ADD8ri_DB:
6372 MIB->setDesc(get(X86::OR8ri));
6373 break;
6374 case X86::ADD16ri_DB:
6375 MIB->setDesc(get(X86::OR16ri));
6376 break;
6377 case X86::ADD32ri_DB:
6378 MIB->setDesc(get(X86::OR32ri));
6379 break;
6380 case X86::ADD64ri32_DB:
6381 MIB->setDesc(get(X86::OR64ri32));
6382 break;
6383 }
6384 return false;
6385}
6386
6387/// Return true for all instructions that only update
6388/// the first 32 or 64-bits of the destination register and leave the rest
6389/// unmodified. This can be used to avoid folding loads if the instructions
6390/// only update part of the destination register, and the non-updated part is
6391/// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these
6392/// instructions breaks the partial register dependency and it can improve
6393/// performance. e.g.:
6394///
6395/// movss (%rdi), %xmm0
6396/// cvtss2sd %xmm0, %xmm0
6397///
6398/// Instead of
6399/// cvtss2sd (%rdi), %xmm0
6400///
6401/// FIXME: This should be turned into a TSFlags.
6402///
6403static bool hasPartialRegUpdate(unsigned Opcode, const X86Subtarget &Subtarget,
6404 bool ForLoadFold = false) {
6405 switch (Opcode) {
6406 case X86::CVTSI2SSrr:
6407 case X86::CVTSI2SSrm:
6408 case X86::CVTSI642SSrr:
6409 case X86::CVTSI642SSrm:
6410 case X86::CVTSI2SDrr:
6411 case X86::CVTSI2SDrm:
6412 case X86::CVTSI642SDrr:
6413 case X86::CVTSI642SDrm:
6414 // Load folding won't effect the undef register update since the input is
6415 // a GPR.
6416 return !ForLoadFold;
6417 case X86::CVTSD2SSrr:
6418 case X86::CVTSD2SSrm:
6419 case X86::CVTSS2SDrr:
6420 case X86::CVTSS2SDrm:
6421 case X86::MOVHPDrm:
6422 case X86::MOVHPSrm:
6423 case X86::MOVLPDrm:
6424 case X86::MOVLPSrm:
6425 case X86::RCPSSr:
6426 case X86::RCPSSm:
6427 case X86::RCPSSr_Int:
6428 case X86::RCPSSm_Int:
6429 case X86::ROUNDSDri:
6430 case X86::ROUNDSDmi:
6431 case X86::ROUNDSSri:
6432 case X86::ROUNDSSmi:
6433 case X86::RSQRTSSr:
6434 case X86::RSQRTSSm:
6435 case X86::RSQRTSSr_Int:
6436 case X86::RSQRTSSm_Int:
6437 case X86::SQRTSSr:
6438 case X86::SQRTSSm:
6439 case X86::SQRTSSr_Int:
6440 case X86::SQRTSSm_Int:
6441 case X86::SQRTSDr:
6442 case X86::SQRTSDm:
6443 case X86::SQRTSDr_Int:
6444 case X86::SQRTSDm_Int:
6445 return true;
6446 case X86::VFCMULCPHZ128rm:
6447 case X86::VFCMULCPHZ128rmb:
6448 case X86::VFCMULCPHZ128rmbkz:
6449 case X86::VFCMULCPHZ128rmkz:
6450 case X86::VFCMULCPHZ128rr:
6451 case X86::VFCMULCPHZ128rrkz:
6452 case X86::VFCMULCPHZ256rm:
6453 case X86::VFCMULCPHZ256rmb:
6454 case X86::VFCMULCPHZ256rmbkz:
6455 case X86::VFCMULCPHZ256rmkz:
6456 case X86::VFCMULCPHZ256rr:
6457 case X86::VFCMULCPHZ256rrkz:
6458 case X86::VFCMULCPHZrm:
6459 case X86::VFCMULCPHZrmb:
6460 case X86::VFCMULCPHZrmbkz:
6461 case X86::VFCMULCPHZrmkz:
6462 case X86::VFCMULCPHZrr:
6463 case X86::VFCMULCPHZrrb:
6464 case X86::VFCMULCPHZrrbkz:
6465 case X86::VFCMULCPHZrrkz:
6466 case X86::VFMULCPHZ128rm:
6467 case X86::VFMULCPHZ128rmb:
6468 case X86::VFMULCPHZ128rmbkz:
6469 case X86::VFMULCPHZ128rmkz:
6470 case X86::VFMULCPHZ128rr:
6471 case X86::VFMULCPHZ128rrkz:
6472 case X86::VFMULCPHZ256rm:
6473 case X86::VFMULCPHZ256rmb:
6474 case X86::VFMULCPHZ256rmbkz:
6475 case X86::VFMULCPHZ256rmkz:
6476 case X86::VFMULCPHZ256rr:
6477 case X86::VFMULCPHZ256rrkz:
6478 case X86::VFMULCPHZrm:
6479 case X86::VFMULCPHZrmb:
6480 case X86::VFMULCPHZrmbkz:
6481 case X86::VFMULCPHZrmkz:
6482 case X86::VFMULCPHZrr:
6483 case X86::VFMULCPHZrrb:
6484 case X86::VFMULCPHZrrbkz:
6485 case X86::VFMULCPHZrrkz:
6486 case X86::VFCMULCSHZrm:
6487 case X86::VFCMULCSHZrmkz:
6488 case X86::VFCMULCSHZrr:
6489 case X86::VFCMULCSHZrrb:
6490 case X86::VFCMULCSHZrrbkz:
6491 case X86::VFCMULCSHZrrkz:
6492 case X86::VFMULCSHZrm:
6493 case X86::VFMULCSHZrmkz:
6494 case X86::VFMULCSHZrr:
6495 case X86::VFMULCSHZrrb:
6496 case X86::VFMULCSHZrrbkz:
6497 case X86::VFMULCSHZrrkz:
6498 return Subtarget.hasMULCFalseDeps();
6499 case X86::VPERMDYrm:
6500 case X86::VPERMDYrr:
6501 case X86::VPERMQYmi:
6502 case X86::VPERMQYri:
6503 case X86::VPERMPSYrm:
6504 case X86::VPERMPSYrr:
6505 case X86::VPERMPDYmi:
6506 case X86::VPERMPDYri:
6507 case X86::VPERMDZ256rm:
6508 case X86::VPERMDZ256rmb:
6509 case X86::VPERMDZ256rmbkz:
6510 case X86::VPERMDZ256rmkz:
6511 case X86::VPERMDZ256rr:
6512 case X86::VPERMDZ256rrkz:
6513 case X86::VPERMDZrm:
6514 case X86::VPERMDZrmb:
6515 case X86::VPERMDZrmbkz:
6516 case X86::VPERMDZrmkz:
6517 case X86::VPERMDZrr:
6518 case X86::VPERMDZrrkz:
6519 case X86::VPERMQZ256mbi:
6520 case X86::VPERMQZ256mbikz:
6521 case X86::VPERMQZ256mi:
6522 case X86::VPERMQZ256mikz:
6523 case X86::VPERMQZ256ri:
6524 case X86::VPERMQZ256rikz:
6525 case X86::VPERMQZ256rm:
6526 case X86::VPERMQZ256rmb:
6527 case X86::VPERMQZ256rmbkz:
6528 case X86::VPERMQZ256rmkz:
6529 case X86::VPERMQZ256rr:
6530 case X86::VPERMQZ256rrkz:
6531 case X86::VPERMQZmbi:
6532 case X86::VPERMQZmbikz:
6533 case X86::VPERMQZmi:
6534 case X86::VPERMQZmikz:
6535 case X86::VPERMQZri:
6536 case X86::VPERMQZrikz:
6537 case X86::VPERMQZrm:
6538 case X86::VPERMQZrmb:
6539 case X86::VPERMQZrmbkz:
6540 case X86::VPERMQZrmkz:
6541 case X86::VPERMQZrr:
6542 case X86::VPERMQZrrkz:
6543 case X86::VPERMPSZ256rm:
6544 case X86::VPERMPSZ256rmb:
6545 case X86::VPERMPSZ256rmbkz:
6546 case X86::VPERMPSZ256rmkz:
6547 case X86::VPERMPSZ256rr:
6548 case X86::VPERMPSZ256rrkz:
6549 case X86::VPERMPSZrm:
6550 case X86::VPERMPSZrmb:
6551 case X86::VPERMPSZrmbkz:
6552 case X86::VPERMPSZrmkz:
6553 case X86::VPERMPSZrr:
6554 case X86::VPERMPSZrrkz:
6555 case X86::VPERMPDZ256mbi:
6556 case X86::VPERMPDZ256mbikz:
6557 case X86::VPERMPDZ256mi:
6558 case X86::VPERMPDZ256mikz:
6559 case X86::VPERMPDZ256ri:
6560 case X86::VPERMPDZ256rikz:
6561 case X86::VPERMPDZ256rm:
6562 case X86::VPERMPDZ256rmb:
6563 case X86::VPERMPDZ256rmbkz:
6564 case X86::VPERMPDZ256rmkz:
6565 case X86::VPERMPDZ256rr:
6566 case X86::VPERMPDZ256rrkz:
6567 case X86::VPERMPDZmbi:
6568 case X86::VPERMPDZmbikz:
6569 case X86::VPERMPDZmi:
6570 case X86::VPERMPDZmikz:
6571 case X86::VPERMPDZri:
6572 case X86::VPERMPDZrikz:
6573 case X86::VPERMPDZrm:
6574 case X86::VPERMPDZrmb:
6575 case X86::VPERMPDZrmbkz:
6576 case X86::VPERMPDZrmkz:
6577 case X86::VPERMPDZrr:
6578 case X86::VPERMPDZrrkz:
6579 return Subtarget.hasPERMFalseDeps();
6580 case X86::VRANGEPDZ128rmbi:
6581 case X86::VRANGEPDZ128rmbikz:
6582 case X86::VRANGEPDZ128rmi:
6583 case X86::VRANGEPDZ128rmikz:
6584 case X86::VRANGEPDZ128rri:
6585 case X86::VRANGEPDZ128rrikz:
6586 case X86::VRANGEPDZ256rmbi:
6587 case X86::VRANGEPDZ256rmbikz:
6588 case X86::VRANGEPDZ256rmi:
6589 case X86::VRANGEPDZ256rmikz:
6590 case X86::VRANGEPDZ256rri:
6591 case X86::VRANGEPDZ256rrikz:
6592 case X86::VRANGEPDZrmbi:
6593 case X86::VRANGEPDZrmbikz:
6594 case X86::VRANGEPDZrmi:
6595 case X86::VRANGEPDZrmikz:
6596 case X86::VRANGEPDZrri:
6597 case X86::VRANGEPDZrrib:
6598 case X86::VRANGEPDZrribkz:
6599 case X86::VRANGEPDZrrikz:
6600 case X86::VRANGEPSZ128rmbi:
6601 case X86::VRANGEPSZ128rmbikz:
6602 case X86::VRANGEPSZ128rmi:
6603 case X86::VRANGEPSZ128rmikz:
6604 case X86::VRANGEPSZ128rri:
6605 case X86::VRANGEPSZ128rrikz:
6606 case X86::VRANGEPSZ256rmbi:
6607 case X86::VRANGEPSZ256rmbikz:
6608 case X86::VRANGEPSZ256rmi:
6609 case X86::VRANGEPSZ256rmikz:
6610 case X86::VRANGEPSZ256rri:
6611 case X86::VRANGEPSZ256rrikz:
6612 case X86::VRANGEPSZrmbi:
6613 case X86::VRANGEPSZrmbikz:
6614 case X86::VRANGEPSZrmi:
6615 case X86::VRANGEPSZrmikz:
6616 case X86::VRANGEPSZrri:
6617 case X86::VRANGEPSZrrib:
6618 case X86::VRANGEPSZrribkz:
6619 case X86::VRANGEPSZrrikz:
6620 case X86::VRANGESDZrmi:
6621 case X86::VRANGESDZrmikz:
6622 case X86::VRANGESDZrri:
6623 case X86::VRANGESDZrrib:
6624 case X86::VRANGESDZrribkz:
6625 case X86::VRANGESDZrrikz:
6626 case X86::VRANGESSZrmi:
6627 case X86::VRANGESSZrmikz:
6628 case X86::VRANGESSZrri:
6629 case X86::VRANGESSZrrib:
6630 case X86::VRANGESSZrribkz:
6631 case X86::VRANGESSZrrikz:
6632 return Subtarget.hasRANGEFalseDeps();
6633 case X86::VGETMANTSSZrmi:
6634 case X86::VGETMANTSSZrmikz:
6635 case X86::VGETMANTSSZrri:
6636 case X86::VGETMANTSSZrrib:
6637 case X86::VGETMANTSSZrribkz:
6638 case X86::VGETMANTSSZrrikz:
6639 case X86::VGETMANTSDZrmi:
6640 case X86::VGETMANTSDZrmikz:
6641 case X86::VGETMANTSDZrri:
6642 case X86::VGETMANTSDZrrib:
6643 case X86::VGETMANTSDZrribkz:
6644 case X86::VGETMANTSDZrrikz:
6645 case X86::VGETMANTSHZrmi:
6646 case X86::VGETMANTSHZrmikz:
6647 case X86::VGETMANTSHZrri:
6648 case X86::VGETMANTSHZrrib:
6649 case X86::VGETMANTSHZrribkz:
6650 case X86::VGETMANTSHZrrikz:
6651 case X86::VGETMANTPSZ128rmbi:
6652 case X86::VGETMANTPSZ128rmbikz:
6653 case X86::VGETMANTPSZ128rmi:
6654 case X86::VGETMANTPSZ128rmikz:
6655 case X86::VGETMANTPSZ256rmbi:
6656 case X86::VGETMANTPSZ256rmbikz:
6657 case X86::VGETMANTPSZ256rmi:
6658 case X86::VGETMANTPSZ256rmikz:
6659 case X86::VGETMANTPSZrmbi:
6660 case X86::VGETMANTPSZrmbikz:
6661 case X86::VGETMANTPSZrmi:
6662 case X86::VGETMANTPSZrmikz:
6663 case X86::VGETMANTPDZ128rmbi:
6664 case X86::VGETMANTPDZ128rmbikz:
6665 case X86::VGETMANTPDZ128rmi:
6666 case X86::VGETMANTPDZ128rmikz:
6667 case X86::VGETMANTPDZ256rmbi:
6668 case X86::VGETMANTPDZ256rmbikz:
6669 case X86::VGETMANTPDZ256rmi:
6670 case X86::VGETMANTPDZ256rmikz:
6671 case X86::VGETMANTPDZrmbi:
6672 case X86::VGETMANTPDZrmbikz:
6673 case X86::VGETMANTPDZrmi:
6674 case X86::VGETMANTPDZrmikz:
6675 return Subtarget.hasGETMANTFalseDeps();
6676 case X86::VPMULLQZ128rm:
6677 case X86::VPMULLQZ128rmb:
6678 case X86::VPMULLQZ128rmbkz:
6679 case X86::VPMULLQZ128rmkz:
6680 case X86::VPMULLQZ128rr:
6681 case X86::VPMULLQZ128rrkz:
6682 case X86::VPMULLQZ256rm:
6683 case X86::VPMULLQZ256rmb:
6684 case X86::VPMULLQZ256rmbkz:
6685 case X86::VPMULLQZ256rmkz:
6686 case X86::VPMULLQZ256rr:
6687 case X86::VPMULLQZ256rrkz:
6688 case X86::VPMULLQZrm:
6689 case X86::VPMULLQZrmb:
6690 case X86::VPMULLQZrmbkz:
6691 case X86::VPMULLQZrmkz:
6692 case X86::VPMULLQZrr:
6693 case X86::VPMULLQZrrkz:
6694 return Subtarget.hasMULLQFalseDeps();
6695 // GPR
6696 case X86::POPCNT32rm:
6697 case X86::POPCNT32rr:
6698 case X86::POPCNT64rm:
6699 case X86::POPCNT64rr:
6700 return Subtarget.hasPOPCNTFalseDeps();
6701 case X86::LZCNT32rm:
6702 case X86::LZCNT32rr:
6703 case X86::LZCNT64rm:
6704 case X86::LZCNT64rr:
6705 case X86::TZCNT32rm:
6706 case X86::TZCNT32rr:
6707 case X86::TZCNT64rm:
6708 case X86::TZCNT64rr:
6709 return Subtarget.hasLZCNTFalseDeps();
6710 }
6711
6712 return false;
6713}
6714
6715/// Inform the BreakFalseDeps pass how many idle
6716/// instructions we would like before a partial register update.
6718 const MachineInstr &MI, unsigned OpNum,
6719 const TargetRegisterInfo *TRI) const {
6720 if (OpNum != 0 || !hasPartialRegUpdate(MI.getOpcode(), Subtarget))
6721 return 0;
6722
6723 // If MI is marked as reading Reg, the partial register update is wanted.
6724 const MachineOperand &MO = MI.getOperand(0);
6725 Register Reg = MO.getReg();
6726 if (Reg.isVirtual()) {
6727 if (MO.readsReg() || MI.readsVirtualRegister(Reg))
6728 return 0;
6729 } else {
6730 if (MI.readsRegister(Reg, TRI))
6731 return 0;
6732 }
6733
6734 // If any instructions in the clearance range are reading Reg, insert a
6735 // dependency breaking instruction, which is inexpensive and is likely to
6736 // be hidden in other instruction's cycles.
6738}
6739
6740// Return true for any instruction the copies the high bits of the first source
6741// operand into the unused high bits of the destination operand.
6742// Also returns true for instructions that have two inputs where one may
6743// be undef and we want it to use the same register as the other input.
6744static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
6745 bool ForLoadFold = false) {
6746 // Set the OpNum parameter to the first source operand.
6747 switch (Opcode) {
6748 case X86::MMX_PUNPCKHBWrr:
6749 case X86::MMX_PUNPCKHWDrr:
6750 case X86::MMX_PUNPCKHDQrr:
6751 case X86::MMX_PUNPCKLBWrr:
6752 case X86::MMX_PUNPCKLWDrr:
6753 case X86::MMX_PUNPCKLDQrr:
6754 case X86::MOVHLPSrr:
6755 case X86::PACKSSWBrr:
6756 case X86::PACKUSWBrr:
6757 case X86::PACKSSDWrr:
6758 case X86::PACKUSDWrr:
6759 case X86::PUNPCKHBWrr:
6760 case X86::PUNPCKLBWrr:
6761 case X86::PUNPCKHWDrr:
6762 case X86::PUNPCKLWDrr:
6763 case X86::PUNPCKHDQrr:
6764 case X86::PUNPCKLDQrr:
6765 case X86::PUNPCKHQDQrr:
6766 case X86::PUNPCKLQDQrr:
6767 case X86::SHUFPDrri:
6768 case X86::SHUFPSrri:
6769 // These instructions are sometimes used with an undef first or second
6770 // source. Return true here so BreakFalseDeps will assign this source to the
6771 // same register as the first source to avoid a false dependency.
6772 // Operand 1 of these instructions is tied so they're separate from their
6773 // VEX counterparts.
6774 return OpNum == 2 && !ForLoadFold;
6775
6776 case X86::VMOVLHPSrr:
6777 case X86::VMOVLHPSZrr:
6778 case X86::VPACKSSWBrr:
6779 case X86::VPACKUSWBrr:
6780 case X86::VPACKSSDWrr:
6781 case X86::VPACKUSDWrr:
6782 case X86::VPACKSSWBZ128rr:
6783 case X86::VPACKUSWBZ128rr:
6784 case X86::VPACKSSDWZ128rr:
6785 case X86::VPACKUSDWZ128rr:
6786 case X86::VPERM2F128rri:
6787 case X86::VPERM2I128rri:
6788 case X86::VSHUFF32X4Z256rri:
6789 case X86::VSHUFF32X4Zrri:
6790 case X86::VSHUFF64X2Z256rri:
6791 case X86::VSHUFF64X2Zrri:
6792 case X86::VSHUFI32X4Z256rri:
6793 case X86::VSHUFI32X4Zrri:
6794 case X86::VSHUFI64X2Z256rri:
6795 case X86::VSHUFI64X2Zrri:
6796 case X86::VPUNPCKHBWrr:
6797 case X86::VPUNPCKLBWrr:
6798 case X86::VPUNPCKHBWYrr:
6799 case X86::VPUNPCKLBWYrr:
6800 case X86::VPUNPCKHBWZ128rr:
6801 case X86::VPUNPCKLBWZ128rr:
6802 case X86::VPUNPCKHBWZ256rr:
6803 case X86::VPUNPCKLBWZ256rr:
6804 case X86::VPUNPCKHBWZrr:
6805 case X86::VPUNPCKLBWZrr:
6806 case X86::VPUNPCKHWDrr:
6807 case X86::VPUNPCKLWDrr:
6808 case X86::VPUNPCKHWDYrr:
6809 case X86::VPUNPCKLWDYrr:
6810 case X86::VPUNPCKHWDZ128rr:
6811 case X86::VPUNPCKLWDZ128rr:
6812 case X86::VPUNPCKHWDZ256rr:
6813 case X86::VPUNPCKLWDZ256rr:
6814 case X86::VPUNPCKHWDZrr:
6815 case X86::VPUNPCKLWDZrr:
6816 case X86::VPUNPCKHDQrr:
6817 case X86::VPUNPCKLDQrr:
6818 case X86::VPUNPCKHDQYrr:
6819 case X86::VPUNPCKLDQYrr:
6820 case X86::VPUNPCKHDQZ128rr:
6821 case X86::VPUNPCKLDQZ128rr:
6822 case X86::VPUNPCKHDQZ256rr:
6823 case X86::VPUNPCKLDQZ256rr:
6824 case X86::VPUNPCKHDQZrr:
6825 case X86::VPUNPCKLDQZrr:
6826 case X86::VPUNPCKHQDQrr:
6827 case X86::VPUNPCKLQDQrr:
6828 case X86::VPUNPCKHQDQYrr:
6829 case X86::VPUNPCKLQDQYrr:
6830 case X86::VPUNPCKHQDQZ128rr:
6831 case X86::VPUNPCKLQDQZ128rr:
6832 case X86::VPUNPCKHQDQZ256rr:
6833 case X86::VPUNPCKLQDQZ256rr:
6834 case X86::VPUNPCKHQDQZrr:
6835 case X86::VPUNPCKLQDQZrr:
6836 // These instructions are sometimes used with an undef first or second
6837 // source. Return true here so BreakFalseDeps will assign this source to the
6838 // same register as the first source to avoid a false dependency.
6839 return (OpNum == 1 || OpNum == 2) && !ForLoadFold;
6840
6841 case X86::VCVTSI2SSrr:
6842 case X86::VCVTSI2SSrm:
6843 case X86::VCVTSI2SSrr_Int:
6844 case X86::VCVTSI2SSrm_Int:
6845 case X86::VCVTSI642SSrr:
6846 case X86::VCVTSI642SSrm:
6847 case X86::VCVTSI642SSrr_Int:
6848 case X86::VCVTSI642SSrm_Int:
6849 case X86::VCVTSI2SDrr:
6850 case X86::VCVTSI2SDrm:
6851 case X86::VCVTSI2SDrr_Int:
6852 case X86::VCVTSI2SDrm_Int:
6853 case X86::VCVTSI642SDrr:
6854 case X86::VCVTSI642SDrm:
6855 case X86::VCVTSI642SDrr_Int:
6856 case X86::VCVTSI642SDrm_Int:
6857 // AVX-512
6858 case X86::VCVTSI2SSZrr:
6859 case X86::VCVTSI2SSZrm:
6860 case X86::VCVTSI2SSZrr_Int:
6861 case X86::VCVTSI2SSZrrb_Int:
6862 case X86::VCVTSI2SSZrm_Int:
6863 case X86::VCVTSI642SSZrr:
6864 case X86::VCVTSI642SSZrm:
6865 case X86::VCVTSI642SSZrr_Int:
6866 case X86::VCVTSI642SSZrrb_Int:
6867 case X86::VCVTSI642SSZrm_Int:
6868 case X86::VCVTSI2SDZrr:
6869 case X86::VCVTSI2SDZrm:
6870 case X86::VCVTSI2SDZrr_Int:
6871 case X86::VCVTSI2SDZrm_Int:
6872 case X86::VCVTSI642SDZrr:
6873 case X86::VCVTSI642SDZrm:
6874 case X86::VCVTSI642SDZrr_Int:
6875 case X86::VCVTSI642SDZrrb_Int:
6876 case X86::VCVTSI642SDZrm_Int:
6877 case X86::VCVTUSI2SSZrr:
6878 case X86::VCVTUSI2SSZrm:
6879 case X86::VCVTUSI2SSZrr_Int:
6880 case X86::VCVTUSI2SSZrrb_Int:
6881 case X86::VCVTUSI2SSZrm_Int:
6882 case X86::VCVTUSI642SSZrr:
6883 case X86::VCVTUSI642SSZrm:
6884 case X86::VCVTUSI642SSZrr_Int:
6885 case X86::VCVTUSI642SSZrrb_Int:
6886 case X86::VCVTUSI642SSZrm_Int:
6887 case X86::VCVTUSI2SDZrr:
6888 case X86::VCVTUSI2SDZrm:
6889 case X86::VCVTUSI2SDZrr_Int:
6890 case X86::VCVTUSI2SDZrm_Int:
6891 case X86::VCVTUSI642SDZrr:
6892 case X86::VCVTUSI642SDZrm:
6893 case X86::VCVTUSI642SDZrr_Int:
6894 case X86::VCVTUSI642SDZrrb_Int:
6895 case X86::VCVTUSI642SDZrm_Int:
6896 case X86::VCVTSI2SHZrr:
6897 case X86::VCVTSI2SHZrm:
6898 case X86::VCVTSI2SHZrr_Int:
6899 case X86::VCVTSI2SHZrrb_Int:
6900 case X86::VCVTSI2SHZrm_Int:
6901 case X86::VCVTSI642SHZrr:
6902 case X86::VCVTSI642SHZrm:
6903 case X86::VCVTSI642SHZrr_Int:
6904 case X86::VCVTSI642SHZrrb_Int:
6905 case X86::VCVTSI642SHZrm_Int:
6906 case X86::VCVTUSI2SHZrr:
6907 case X86::VCVTUSI2SHZrm:
6908 case X86::VCVTUSI2SHZrr_Int:
6909 case X86::VCVTUSI2SHZrrb_Int:
6910 case X86::VCVTUSI2SHZrm_Int:
6911 case X86::VCVTUSI642SHZrr:
6912 case X86::VCVTUSI642SHZrm:
6913 case X86::VCVTUSI642SHZrr_Int:
6914 case X86::VCVTUSI642SHZrrb_Int:
6915 case X86::VCVTUSI642SHZrm_Int:
6916 // Load folding won't effect the undef register update since the input is
6917 // a GPR.
6918 return OpNum == 1 && !ForLoadFold;
6919 case X86::VCVTSD2SSrr:
6920 case X86::VCVTSD2SSrm:
6921 case X86::VCVTSD2SSrr_Int:
6922 case X86::VCVTSD2SSrm_Int:
6923 case X86::VCVTSS2SDrr:
6924 case X86::VCVTSS2SDrm:
6925 case X86::VCVTSS2SDrr_Int:
6926 case X86::VCVTSS2SDrm_Int:
6927 case X86::VRCPSSr:
6928 case X86::VRCPSSr_Int:
6929 case X86::VRCPSSm:
6930 case X86::VRCPSSm_Int:
6931 case X86::VROUNDSDri:
6932 case X86::VROUNDSDmi:
6933 case X86::VROUNDSDri_Int:
6934 case X86::VROUNDSDmi_Int:
6935 case X86::VROUNDSSri:
6936 case X86::VROUNDSSmi:
6937 case X86::VROUNDSSri_Int:
6938 case X86::VROUNDSSmi_Int:
6939 case X86::VRSQRTSSr:
6940 case X86::VRSQRTSSr_Int:
6941 case X86::VRSQRTSSm:
6942 case X86::VRSQRTSSm_Int:
6943 case X86::VSQRTSSr:
6944 case X86::VSQRTSSr_Int:
6945 case X86::VSQRTSSm:
6946 case X86::VSQRTSSm_Int:
6947 case X86::VSQRTSDr:
6948 case X86::VSQRTSDr_Int:
6949 case X86::VSQRTSDm:
6950 case X86::VSQRTSDm_Int:
6951 // AVX-512
6952 case X86::VCVTSD2SSZrr:
6953 case X86::VCVTSD2SSZrr_Int:
6954 case X86::VCVTSD2SSZrrb_Int:
6955 case X86::VCVTSD2SSZrm:
6956 case X86::VCVTSD2SSZrm_Int:
6957 case X86::VCVTSS2SDZrr:
6958 case X86::VCVTSS2SDZrr_Int:
6959 case X86::VCVTSS2SDZrrb_Int:
6960 case X86::VCVTSS2SDZrm:
6961 case X86::VCVTSS2SDZrm_Int:
6962 case X86::VGETEXPSDZr:
6963 case X86::VGETEXPSDZrb:
6964 case X86::VGETEXPSDZm:
6965 case X86::VGETEXPSSZr:
6966 case X86::VGETEXPSSZrb:
6967 case X86::VGETEXPSSZm:
6968 case X86::VGETMANTSDZrri:
6969 case X86::VGETMANTSDZrrib:
6970 case X86::VGETMANTSDZrmi:
6971 case X86::VGETMANTSSZrri:
6972 case X86::VGETMANTSSZrrib:
6973 case X86::VGETMANTSSZrmi:
6974 case X86::VRNDSCALESDZrri:
6975 case X86::VRNDSCALESDZrri_Int:
6976 case X86::VRNDSCALESDZrrib_Int:
6977 case X86::VRNDSCALESDZrmi:
6978 case X86::VRNDSCALESDZrmi_Int:
6979 case X86::VRNDSCALESSZrri:
6980 case X86::VRNDSCALESSZrri_Int:
6981 case X86::VRNDSCALESSZrrib_Int:
6982 case X86::VRNDSCALESSZrmi:
6983 case X86::VRNDSCALESSZrmi_Int:
6984 case X86::VRCP14SDZrr:
6985 case X86::VRCP14SDZrm:
6986 case X86::VRCP14SSZrr:
6987 case X86::VRCP14SSZrm:
6988 case X86::VRCPSHZrr:
6989 case X86::VRCPSHZrm:
6990 case X86::VRSQRTSHZrr:
6991 case X86::VRSQRTSHZrm:
6992 case X86::VREDUCESHZrmi:
6993 case X86::VREDUCESHZrri:
6994 case X86::VREDUCESHZrrib:
6995 case X86::VGETEXPSHZr:
6996 case X86::VGETEXPSHZrb:
6997 case X86::VGETEXPSHZm:
6998 case X86::VGETMANTSHZrri:
6999 case X86::VGETMANTSHZrrib:
7000 case X86::VGETMANTSHZrmi:
7001 case X86::VRNDSCALESHZrri:
7002 case X86::VRNDSCALESHZrri_Int:
7003 case X86::VRNDSCALESHZrrib_Int:
7004 case X86::VRNDSCALESHZrmi:
7005 case X86::VRNDSCALESHZrmi_Int:
7006 case X86::VSQRTSHZr:
7007 case X86::VSQRTSHZr_Int:
7008 case X86::VSQRTSHZrb_Int:
7009 case X86::VSQRTSHZm:
7010 case X86::VSQRTSHZm_Int:
7011 case X86::VRCP28SDZr:
7012 case X86::VRCP28SDZrb:
7013 case X86::VRCP28SDZm:
7014 case X86::VRCP28SSZr:
7015 case X86::VRCP28SSZrb:
7016 case X86::VRCP28SSZm:
7017 case X86::VREDUCESSZrmi:
7018 case X86::VREDUCESSZrri:
7019 case X86::VREDUCESSZrrib:
7020 case X86::VRSQRT14SDZrr:
7021 case X86::VRSQRT14SDZrm:
7022 case X86::VRSQRT14SSZrr:
7023 case X86::VRSQRT14SSZrm:
7024 case X86::VRSQRT28SDZr:
7025 case X86::VRSQRT28SDZrb:
7026 case X86::VRSQRT28SDZm:
7027 case X86::VRSQRT28SSZr:
7028 case X86::VRSQRT28SSZrb:
7029 case X86::VRSQRT28SSZm:
7030 case X86::VSQRTSSZr:
7031 case X86::VSQRTSSZr_Int:
7032 case X86::VSQRTSSZrb_Int:
7033 case X86::VSQRTSSZm:
7034 case X86::VSQRTSSZm_Int:
7035 case X86::VSQRTSDZr:
7036 case X86::VSQRTSDZr_Int:
7037 case X86::VSQRTSDZrb_Int:
7038 case X86::VSQRTSDZm:
7039 case X86::VSQRTSDZm_Int:
7040 case X86::VCVTSD2SHZrr:
7041 case X86::VCVTSD2SHZrr_Int:
7042 case X86::VCVTSD2SHZrrb_Int:
7043 case X86::VCVTSD2SHZrm:
7044 case X86::VCVTSD2SHZrm_Int:
7045 case X86::VCVTSS2SHZrr:
7046 case X86::VCVTSS2SHZrr_Int:
7047 case X86::VCVTSS2SHZrrb_Int:
7048 case X86::VCVTSS2SHZrm:
7049 case X86::VCVTSS2SHZrm_Int:
7050 case X86::VCVTSH2SDZrr:
7051 case X86::VCVTSH2SDZrr_Int:
7052 case X86::VCVTSH2SDZrrb_Int:
7053 case X86::VCVTSH2SDZrm:
7054 case X86::VCVTSH2SDZrm_Int:
7055 case X86::VCVTSH2SSZrr:
7056 case X86::VCVTSH2SSZrr_Int:
7057 case X86::VCVTSH2SSZrrb_Int:
7058 case X86::VCVTSH2SSZrm:
7059 case X86::VCVTSH2SSZrm_Int:
7060 return OpNum == 1;
7061 case X86::VMOVSSZrrk:
7062 case X86::VMOVSDZrrk:
7063 return OpNum == 3 && !ForLoadFold;
7064 case X86::VMOVSSZrrkz:
7065 case X86::VMOVSDZrrkz:
7066 return OpNum == 2 && !ForLoadFold;
7067 }
7068
7069 return false;
7070}
7071
7072/// Inform the BreakFalseDeps pass how many idle instructions we would like
7073/// before certain undef register reads.
7074///
7075/// This catches the VCVTSI2SD family of instructions:
7076///
7077/// vcvtsi2sdq %rax, undef %xmm0, %xmm14
7078///
7079/// We should to be careful *not* to catch VXOR idioms which are presumably
7080/// handled specially in the pipeline:
7081///
7082/// vxorps undef %xmm1, undef %xmm1, %xmm1
7083///
7084/// Like getPartialRegUpdateClearance, this makes a strong assumption that the
7085/// high bits that are passed-through are not live.
7086unsigned
7088 const TargetRegisterInfo *TRI) const {
7089 const MachineOperand &MO = MI.getOperand(OpNum);
7090 if (MO.getReg().isPhysical() && hasUndefRegUpdate(MI.getOpcode(), OpNum))
7091 return UndefRegClearance;
7092
7093 return 0;
7094}
7095
7097 MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
7098 Register Reg = MI.getOperand(OpNum).getReg();
7099 // If MI kills this register, the false dependence is already broken.
7100 if (MI.killsRegister(Reg, TRI))
7101 return;
7102
7103 if (X86::VR128RegClass.contains(Reg)) {
7104 // These instructions are all floating point domain, so xorps is the best
7105 // choice.
7106 unsigned Opc = Subtarget.hasAVX() ? X86::VXORPSrr : X86::XORPSrr;
7107 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(Opc), Reg)
7108 .addReg(Reg, RegState::Undef)
7109 .addReg(Reg, RegState::Undef);
7110 MI.addRegisterKilled(Reg, TRI, true);
7111 } else if (X86::VR256RegClass.contains(Reg)) {
7112 // Use vxorps to clear the full ymm register.
7113 // It wants to read and write the xmm sub-register.
7114 Register XReg = TRI->getSubReg(Reg, X86::sub_xmm);
7115 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VXORPSrr), XReg)
7116 .addReg(XReg, RegState::Undef)
7117 .addReg(XReg, RegState::Undef)
7119 MI.addRegisterKilled(Reg, TRI, true);
7120 } else if (X86::VR128XRegClass.contains(Reg)) {
7121 // Only handle VLX targets.
7122 if (!Subtarget.hasVLX())
7123 return;
7124 // Since vxorps requires AVX512DQ, vpxord should be the best choice.
7125 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VPXORDZ128rr), Reg)
7126 .addReg(Reg, RegState::Undef)
7127 .addReg(Reg, RegState::Undef);
7128 MI.addRegisterKilled(Reg, TRI, true);
7129 } else if (X86::VR256XRegClass.contains(Reg) ||
7130 X86::VR512RegClass.contains(Reg)) {
7131 // Only handle VLX targets.
7132 if (!Subtarget.hasVLX())
7133 return;
7134 // Use vpxord to clear the full ymm/zmm register.
7135 // It wants to read and write the xmm sub-register.
7136 Register XReg = TRI->getSubReg(Reg, X86::sub_xmm);
7137 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VPXORDZ128rr), XReg)
7138 .addReg(XReg, RegState::Undef)
7139 .addReg(XReg, RegState::Undef)
7141 MI.addRegisterKilled(Reg, TRI, true);
7142 } else if (X86::GR64RegClass.contains(Reg)) {
7143 // Using XOR32rr because it has shorter encoding and zeros up the upper bits
7144 // as well.
7145 Register XReg = TRI->getSubReg(Reg, X86::sub_32bit);
7146 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), XReg)
7147 .addReg(XReg, RegState::Undef)
7148 .addReg(XReg, RegState::Undef)
7150 MI.addRegisterKilled(Reg, TRI, true);
7151 } else if (X86::GR32RegClass.contains(Reg)) {
7152 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), Reg)
7153 .addReg(Reg, RegState::Undef)
7154 .addReg(Reg, RegState::Undef);
7155 MI.addRegisterKilled(Reg, TRI, true);
7156 }
7157}
7158
7160 int PtrOffset = 0) {
7161 unsigned NumAddrOps = MOs.size();
7162
7163 if (NumAddrOps < 4) {
7164 // FrameIndex only - add an immediate offset (whether its zero or not).
7165 for (unsigned i = 0; i != NumAddrOps; ++i)
7166 MIB.add(MOs[i]);
7167 addOffset(MIB, PtrOffset);
7168 } else {
7169 // General Memory Addressing - we need to add any offset to an existing
7170 // offset.
7171 assert(MOs.size() == 5 && "Unexpected memory operand list length");
7172 for (unsigned i = 0; i != NumAddrOps; ++i) {
7173 const MachineOperand &MO = MOs[i];
7174 if (i == 3 && PtrOffset != 0) {
7175 MIB.addDisp(MO, PtrOffset);
7176 } else {
7177 MIB.add(MO);
7178 }
7179 }
7180 }
7181}
7182
7184 MachineInstr &NewMI,
7185 const TargetInstrInfo &TII) {
7187 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
7188
7189 for (int Idx : llvm::seq<int>(0, NewMI.getNumOperands())) {
7190 MachineOperand &MO = NewMI.getOperand(Idx);
7191 // We only need to update constraints on virtual register operands.
7192 if (!MO.isReg())
7193 continue;
7194 Register Reg = MO.getReg();
7195 if (!Reg.isVirtual())
7196 continue;
7197
7198 auto *NewRC = MRI.constrainRegClass(
7199 Reg, TII.getRegClass(NewMI.getDesc(), Idx, &TRI, MF));
7200 if (!NewRC) {
7201 LLVM_DEBUG(
7202 dbgs() << "WARNING: Unable to update register constraint for operand "
7203 << Idx << " of instruction:\n";
7204 NewMI.dump(); dbgs() << "\n");
7205 }
7206 }
7207}
7208
7209static MachineInstr *fuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
7213 const TargetInstrInfo &TII) {
7214 // Create the base instruction with the memory operand as the first part.
7215 // Omit the implicit operands, something BuildMI can't do.
7216 MachineInstr *NewMI =
7217 MF.CreateMachineInstr(TII.get(Opcode), MI.getDebugLoc(), true);
7218 MachineInstrBuilder MIB(MF, NewMI);
7219 addOperands(MIB, MOs);
7220
7221 // Loop over the rest of the ri operands, converting them over.
7222 unsigned NumOps = MI.getDesc().getNumOperands() - 2;
7223 for (unsigned i = 0; i != NumOps; ++i) {
7224 MachineOperand &MO = MI.getOperand(i + 2);
7225 MIB.add(MO);
7226 }
7227 for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), NumOps + 2))
7228 MIB.add(MO);
7229
7230 updateOperandRegConstraints(MF, *NewMI, TII);
7231
7232 MachineBasicBlock *MBB = InsertPt->getParent();
7233 MBB->insert(InsertPt, NewMI);
7234
7235 return MIB;
7236}
7237
7238static MachineInstr *fuseInst(MachineFunction &MF, unsigned Opcode,
7239 unsigned OpNo, ArrayRef<MachineOperand> MOs,
7242 int PtrOffset = 0) {
7243 // Omit the implicit operands, something BuildMI can't do.
7244 MachineInstr *NewMI =
7245 MF.CreateMachineInstr(TII.get(Opcode), MI.getDebugLoc(), true);
7246 MachineInstrBuilder MIB(MF, NewMI);
7247
7248 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
7249 MachineOperand &MO = MI.getOperand(i);
7250 if (i == OpNo) {
7251 assert(MO.isReg() && "Expected to fold into reg operand!");
7252 addOperands(MIB, MOs, PtrOffset);
7253 } else {
7254 MIB.add(MO);
7255 }
7256 }
7257
7258 updateOperandRegConstraints(MF, *NewMI, TII);
7259
7260 // Copy the NoFPExcept flag from the instruction we're fusing.
7263
7264 MachineBasicBlock *MBB = InsertPt->getParent();
7265 MBB->insert(InsertPt, NewMI);
7266
7267 return MIB;
7268}
7269
7270static MachineInstr *makeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
7273 MachineInstr &MI) {
7274 MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
7275 MI.getDebugLoc(), TII.get(Opcode));
7276 addOperands(MIB, MOs);
7277 return MIB.addImm(0);
7278}
7279
7280MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
7281 MachineFunction &MF, MachineInstr &MI, unsigned OpNum,
7283 unsigned Size, Align Alignment) const {
7284 switch (MI.getOpcode()) {
7285 case X86::INSERTPSrri:
7286 case X86::VINSERTPSrri:
7287 case X86::VINSERTPSZrri:
7288 // Attempt to convert the load of inserted vector into a fold load
7289 // of a single float.
7290 if (OpNum == 2) {
7291 unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm();
7292 unsigned ZMask = Imm & 15;
7293 unsigned DstIdx = (Imm >> 4) & 3;
7294 unsigned SrcIdx = (Imm >> 6) & 3;
7295
7297 const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
7298 unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
7299 if ((Size == 0 || Size >= 16) && RCSize >= 16 &&
7300 (MI.getOpcode() != X86::INSERTPSrri || Alignment >= Align(4))) {
7301 int PtrOffset = SrcIdx * 4;
7302 unsigned NewImm = (DstIdx << 4) | ZMask;
7303 unsigned NewOpCode =
7304 (MI.getOpcode() == X86::VINSERTPSZrri) ? X86::VINSERTPSZrmi
7305 : (MI.getOpcode() == X86::VINSERTPSrri) ? X86::VINSERTPSrmi
7306 : X86::INSERTPSrmi;
7307 MachineInstr *NewMI =
7308 fuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, PtrOffset);
7309 NewMI->getOperand(NewMI->getNumOperands() - 1).setImm(NewImm);
7310 return NewMI;
7311 }
7312 }
7313 break;
7314 case X86::MOVHLPSrr:
7315 case X86::VMOVHLPSrr:
7316 case X86::VMOVHLPSZrr:
7317 // Move the upper 64-bits of the second operand to the lower 64-bits.
7318 // To fold the load, adjust the pointer to the upper and use (V)MOVLPS.
7319 // TODO: In most cases AVX doesn't have a 8-byte alignment requirement.
7320 if (OpNum == 2) {
7322 const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
7323 unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
7324 if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment >= Align(8)) {
7325 unsigned NewOpCode =
7326 (MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm
7327 : (MI.getOpcode() == X86::VMOVHLPSrr) ? X86::VMOVLPSrm
7328 : X86::MOVLPSrm;
7329 MachineInstr *NewMI =
7330 fuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, 8);
7331 return NewMI;
7332 }
7333 }
7334 break;
7335 case X86::UNPCKLPDrr:
7336 // If we won't be able to fold this to the memory form of UNPCKL, use
7337 // MOVHPD instead. Done as custom because we can't have this in the load
7338 // table twice.
7339 if (OpNum == 2) {
7341 const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
7342 unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
7343 if ((Size == 0 || Size >= 16) && RCSize >= 16 && Alignment < Align(16)) {
7344 MachineInstr *NewMI =
7345 fuseInst(MF, X86::MOVHPDrm, OpNum, MOs, InsertPt, MI, *this);
7346 return NewMI;
7347 }
7348 }
7349 break;
7350 case X86::MOV32r0:
7351 if (auto *NewMI =
7352 makeM0Inst(*this, (Size == 4) ? X86::MOV32mi : X86::MOV64mi32, MOs,
7353 InsertPt, MI))
7354 return NewMI;
7355 break;
7356 }
7357
7358 return nullptr;
7359}
7360
7362 MachineInstr &MI) {
7363 if (!hasUndefRegUpdate(MI.getOpcode(), 1, /*ForLoadFold*/ true) ||
7364 !MI.getOperand(1).isReg())
7365 return false;
7366
7367 // The are two cases we need to handle depending on where in the pipeline
7368 // the folding attempt is being made.
7369 // -Register has the undef flag set.
7370 // -Register is produced by the IMPLICIT_DEF instruction.
7371
7372 if (MI.getOperand(1).isUndef())
7373 return true;
7374
7376 MachineInstr *VRegDef = RegInfo.getUniqueVRegDef(MI.getOperand(1).getReg());
7377 return VRegDef && VRegDef->isImplicitDef();
7378}
7379
7380unsigned X86InstrInfo::commuteOperandsForFold(MachineInstr &MI,
7381 unsigned Idx1) const {
7382 unsigned Idx2 = CommuteAnyOperandIndex;
7383 if (!findCommutedOpIndices(MI, Idx1, Idx2))
7384 return Idx1;
7385
7386 bool HasDef = MI.getDesc().getNumDefs();
7387 Register Reg0 = HasDef ? MI.getOperand(0).getReg() : Register();
7388 Register Reg1 = MI.getOperand(Idx1).getReg();
7389 Register Reg2 = MI.getOperand(Idx2).getReg();
7390 bool Tied1 = 0 == MI.getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO);
7391 bool Tied2 = 0 == MI.getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO);
7392
7393 // If either of the commutable operands are tied to the destination
7394 // then we can not commute + fold.
7395 if ((HasDef && Reg0 == Reg1 && Tied1) || (HasDef && Reg0 == Reg2 && Tied2))
7396 return Idx1;
7397
7398 return commuteInstruction(MI, false, Idx1, Idx2) ? Idx2 : Idx1;
7399}
7400
7401static void printFailMsgforFold(const MachineInstr &MI, unsigned Idx) {
7402 if (PrintFailedFusing && !MI.isCopy())
7403 dbgs() << "We failed to fuse operand " << Idx << " in " << MI;
7404}
7405
7407 MachineFunction &MF, MachineInstr &MI, unsigned OpNum,
7409 unsigned Size, Align Alignment, bool AllowCommute) const {
7410 bool isSlowTwoMemOps = Subtarget.slowTwoMemOps();
7411 unsigned Opc = MI.getOpcode();
7412
7413 // For CPUs that favor the register form of a call or push,
7414 // do not fold loads into calls or pushes, unless optimizing for size
7415 // aggressively.
7416 if (isSlowTwoMemOps && !MF.getFunction().hasMinSize() &&
7417 (Opc == X86::CALL32r || Opc == X86::CALL64r || Opc == X86::PUSH16r ||
7418 Opc == X86::PUSH32r || Opc == X86::PUSH64r))
7419 return nullptr;
7420
7421 // Avoid partial and undef register update stalls unless optimizing for size.
7422 if (!MF.getFunction().hasOptSize() &&
7423 (hasPartialRegUpdate(Opc, Subtarget, /*ForLoadFold*/ true) ||
7425 return nullptr;
7426
7427 unsigned NumOps = MI.getDesc().getNumOperands();
7428 bool IsTwoAddr = NumOps > 1 && OpNum < 2 && MI.getOperand(0).isReg() &&
7429 MI.getOperand(1).isReg() &&
7430 MI.getOperand(0).getReg() == MI.getOperand(1).getReg();
7431
7432 // FIXME: AsmPrinter doesn't know how to handle
7433 // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
7434 if (Opc == X86::ADD32ri &&
7435 MI.getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
7436 return nullptr;
7437
7438 // GOTTPOFF relocation loads can only be folded into add instructions.
7439 // FIXME: Need to exclude other relocations that only support specific
7440 // instructions.
7441 if (MOs.size() == X86::AddrNumOperands &&
7442 MOs[X86::AddrDisp].getTargetFlags() == X86II::MO_GOTTPOFF &&
7443 Opc != X86::ADD64rr)
7444 return nullptr;
7445
7446 // Don't fold loads into indirect calls that need a KCFI check as we'll
7447 // have to unfold these in X86TargetLowering::EmitKCFICheck anyway.
7448 if (MI.isCall() && MI.getCFIType())
7449 return nullptr;
7450
7451 // Attempt to fold any custom cases we have.
7452 if (auto *CustomMI = foldMemoryOperandCustom(MF, MI, OpNum, MOs, InsertPt,
7453 Size, Alignment))
7454 return CustomMI;
7455
7456 // Folding a memory location into the two-address part of a two-address
7457 // instruction is different than folding it other places. It requires
7458 // replacing the *two* registers with the memory location.
7459 //
7460 // Utilize the mapping NonNDD -> RMW for the NDD variant.
7461 unsigned NonNDOpc = Subtarget.hasNDD() ? X86::getNonNDVariant(Opc) : 0U;
7462 const X86FoldTableEntry *I =
7463 IsTwoAddr ? lookupTwoAddrFoldTable(NonNDOpc ? NonNDOpc : Opc)
7464 : lookupFoldTable(Opc, OpNum);
7465
7466 MachineInstr *NewMI = nullptr;
7467 if (I) {
7468 unsigned Opcode = I->DstOp;
7469 if (Alignment <
7470 Align(1ULL << ((I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT)))
7471 return nullptr;
7472 bool NarrowToMOV32rm = false;
7473 if (Size) {
7475 const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
7476 unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
7477 // Check if it's safe to fold the load. If the size of the object is
7478 // narrower than the load width, then it's not.
7479 // FIXME: Allow scalar intrinsic instructions like ADDSSrm_Int.
7480 if ((I->Flags & TB_FOLDED_LOAD) && Size < RCSize) {
7481 // If this is a 64-bit load, but the spill slot is 32, then we can do
7482 // a 32-bit load which is implicitly zero-extended. This likely is
7483 // due to live interval analysis remat'ing a load from stack slot.
7484 if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
7485 return nullptr;
7486 if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
7487 return nullptr;
7488 Opcode = X86::MOV32rm;
7489 NarrowToMOV32rm = true;
7490 }
7491 // For stores, make sure the size of the object is equal to the size of
7492 // the store. If the object is larger, the extra bits would be garbage. If
7493 // the object is smaller we might overwrite another object or fault.
7494 if ((I->Flags & TB_FOLDED_STORE) && Size != RCSize)
7495 return nullptr;
7496 }
7497
7498 NewMI = IsTwoAddr ? fuseTwoAddrInst(MF, Opcode, MOs, InsertPt, MI, *this)
7499 : fuseInst(MF, Opcode, OpNum, MOs, InsertPt, MI, *this);
7500
7501 if (NarrowToMOV32rm) {
7502 // If this is the special case where we use a MOV32rm to load a 32-bit
7503 // value and zero-extend the top bits. Change the destination register
7504 // to a 32-bit one.
7505 Register DstReg = NewMI->getOperand(0).getReg();
7506 if (DstReg.isPhysical())
7507 NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit));
7508 else
7509 NewMI->getOperand(0).setSubReg(X86::sub_32bit);
7510 }
7511 return NewMI;
7512 }
7513
7514 if (AllowCommute) {
7515 // If the instruction and target operand are commutable, commute the
7516 // instruction and try again.
7517 unsigned CommuteOpIdx2 = commuteOperandsForFold(MI, OpNum);
7518 if (CommuteOpIdx2 == OpNum) {
7519 printFailMsgforFold(MI, OpNum);
7520 return nullptr;
7521 }
7522 // Attempt to fold with the commuted version of the instruction.
7523 NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx2, MOs, InsertPt, Size,
7524 Alignment, /*AllowCommute=*/false);
7525 if (NewMI)
7526 return NewMI;
7527 // Folding failed again - undo the commute before returning.
7528 commuteInstruction(MI, false, OpNum, CommuteOpIdx2);
7529 }
7530
7531 printFailMsgforFold(MI, OpNum);
7532 return nullptr;
7533}
7534
7537 MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
7538 VirtRegMap *VRM) const {
7539 // Check switch flag
7540 if (NoFusing)
7541 return nullptr;
7542
7543 // Avoid partial and undef register update stalls unless optimizing for size.
7544 if (!MF.getFunction().hasOptSize() &&
7545 (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/ true) ||
7547 return nullptr;
7548
7549 // Don't fold subreg spills, or reloads that use a high subreg.
7550 for (auto Op : Ops) {
7551 MachineOperand &MO = MI.getOperand(Op);
7552 auto SubReg = MO.getSubReg();
7553 // MOV32r0 is special b/c it's used to clear a 64-bit register too.
7554 // (See patterns for MOV32r0 in TD files).
7555 if (MI.getOpcode() == X86::MOV32r0 && SubReg == X86::sub_32bit)
7556 continue;
7557 if (SubReg && (MO.isDef() || SubReg == X86::sub_8bit_hi))
7558 return nullptr;
7559 }
7560
7561 const MachineFrameInfo &MFI = MF.getFrameInfo();
7562 unsigned Size = MFI.getObjectSize(FrameIndex);
7563 Align Alignment = MFI.getObjectAlign(FrameIndex);
7564 // If the function stack isn't realigned we don't want to fold instructions
7565 // that need increased alignment.
7566 if (!RI.hasStackRealignment(MF))
7567 Alignment =
7568 std::min(Alignment, Subtarget.getFrameLowering()->getStackAlign());
7569
7570 auto Impl = [&]() {
7571 return foldMemoryOperandImpl(MF, MI, Ops[0],
7572 MachineOperand::CreateFI(FrameIndex), InsertPt,
7573 Size, Alignment, /*AllowCommute=*/true);
7574 };
7575 if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
7576 unsigned NewOpc = 0;
7577 unsigned RCSize = 0;
7578 unsigned Opc = MI.getOpcode();
7579 switch (Opc) {
7580 default:
7581 // NDD can be folded into RMW though its Op0 and Op1 are not tied.
7582 return (Subtarget.hasNDD() ? X86::getNonNDVariant(Opc) : 0U) ? Impl()
7583 : nullptr;
7584 case X86::TEST8rr:
7585 NewOpc = X86::CMP8ri;
7586 RCSize = 1;
7587 break;
7588 case X86::TEST16rr:
7589 NewOpc = X86::CMP16ri;
7590 RCSize = 2;
7591 break;
7592 case X86::TEST32rr:
7593 NewOpc = X86::CMP32ri;
7594 RCSize = 4;
7595 break;
7596 case X86::TEST64rr:
7597 NewOpc = X86::CMP64ri32;
7598 RCSize = 8;
7599 break;
7600 }
7601 // Check if it's safe to fold the load. If the size of the object is
7602 // narrower than the load width, then it's not.
7603 if (Size < RCSize)
7604 return nullptr;
7605 // Change to CMPXXri r, 0 first.
7606 MI.setDesc(get(NewOpc));
7607 MI.getOperand(1).ChangeToImmediate(0);
7608 } else if (Ops.size() != 1)
7609 return nullptr;
7610
7611 return Impl();
7612}
7613
7614/// Check if \p LoadMI is a partial register load that we can't fold into \p MI
7615/// because the latter uses contents that wouldn't be defined in the folded
7616/// version. For instance, this transformation isn't legal:
7617/// movss (%rdi), %xmm0
7618/// addps %xmm0, %xmm0
7619/// ->
7620/// addps (%rdi), %xmm0
7621///
7622/// But this one is:
7623/// movss (%rdi), %xmm0
7624/// addss %xmm0, %xmm0
7625/// ->
7626/// addss (%rdi), %xmm0
7627///
7629 const MachineInstr &UserMI,
7630 const MachineFunction &MF) {
7631 unsigned Opc = LoadMI.getOpcode();
7632 unsigned UserOpc = UserMI.getOpcode();
7634 const TargetRegisterClass *RC =
7635 MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg());
7636 unsigned RegSize = TRI.getRegSizeInBits(*RC);
7637
7638 if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm || Opc == X86::VMOVSSZrm ||
7639 Opc == X86::MOVSSrm_alt || Opc == X86::VMOVSSrm_alt ||
7640 Opc == X86::VMOVSSZrm_alt) &&
7641 RegSize > 32) {
7642 // These instructions only load 32 bits, we can't fold them if the
7643 // destination register is wider than 32 bits (4 bytes), and its user
7644 // instruction isn't scalar (SS).
7645 switch (UserOpc) {
7646 case X86::CVTSS2SDrr_Int:
7647 case X86::VCVTSS2SDrr_Int:
7648 case X86::VCVTSS2SDZrr_Int:
7649 case X86::VCVTSS2SDZrr_Intk:
7650 case X86::VCVTSS2SDZrr_Intkz:
7651 case X86::CVTSS2SIrr_Int:
7652 case X86::CVTSS2SI64rr_Int:
7653 case X86::VCVTSS2SIrr_Int:
7654 case X86::VCVTSS2SI64rr_Int:
7655 case X86::VCVTSS2SIZrr_Int:
7656 case X86::VCVTSS2SI64Zrr_Int:
7657 case X86::CVTTSS2SIrr_Int:
7658 case X86::CVTTSS2SI64rr_Int:
7659 case X86::VCVTTSS2SIrr_Int:
7660 case X86::VCVTTSS2SI64rr_Int:
7661 case X86::VCVTTSS2SIZrr_Int:
7662 case X86::VCVTTSS2SI64Zrr_Int:
7663 case X86::VCVTSS2USIZrr_Int:
7664 case X86::VCVTSS2USI64Zrr_Int:
7665 case X86::VCVTTSS2USIZrr_Int:
7666 case X86::VCVTTSS2USI64Zrr_Int:
7667 case X86::RCPSSr_Int:
7668 case X86::VRCPSSr_Int:
7669 case X86::RSQRTSSr_Int:
7670 case X86::VRSQRTSSr_Int:
7671 case X86::ROUNDSSri_Int:
7672 case X86::VROUNDSSri_Int:
7673 case X86::COMISSrr_Int:
7674 case X86::VCOMISSrr_Int:
7675 case X86::VCOMISSZrr_Int:
7676 case X86::UCOMISSrr_Int:
7677 case X86::VUCOMISSrr_Int:
7678 case X86::VUCOMISSZrr_Int:
7679 case X86::ADDSSrr_Int:
7680 case X86::VADDSSrr_Int:
7681 case X86::VADDSSZrr_Int:
7682 case X86::CMPSSrri_Int:
7683 case X86::VCMPSSrri_Int:
7684 case X86::VCMPSSZrri_Int:
7685 case X86::DIVSSrr_Int:
7686 case X86::VDIVSSrr_Int:
7687 case X86::VDIVSSZrr_Int:
7688 case X86::MAXSSrr_Int:
7689 case X86::VMAXSSrr_Int:
7690 case X86::VMAXSSZrr_Int:
7691 case X86::MINSSrr_Int:
7692 case X86::VMINSSrr_Int:
7693 case X86::VMINSSZrr_Int:
7694 case X86::MULSSrr_Int:
7695 case X86::VMULSSrr_Int:
7696 case X86::VMULSSZrr_Int:
7697 case X86::SQRTSSr_Int:
7698 case X86::VSQRTSSr_Int:
7699 case X86::VSQRTSSZr_Int:
7700 case X86::SUBSSrr_Int:
7701 case X86::VSUBSSrr_Int:
7702 case X86::VSUBSSZrr_Int:
7703 case X86::VADDSSZrr_Intk:
7704 case X86::VADDSSZrr_Intkz:
7705 case X86::VCMPSSZrri_Intk:
7706 case X86::VDIVSSZrr_Intk:
7707 case X86::VDIVSSZrr_Intkz:
7708 case X86::VMAXSSZrr_Intk:
7709 case X86::VMAXSSZrr_Intkz:
7710 case X86::VMINSSZrr_Intk:
7711 case X86::VMINSSZrr_Intkz:
7712 case X86::VMULSSZrr_Intk:
7713 case X86::VMULSSZrr_Intkz:
7714 case X86::VSQRTSSZr_Intk:
7715 case X86::VSQRTSSZr_Intkz:
7716 case X86::VSUBSSZrr_Intk:
7717 case X86::VSUBSSZrr_Intkz:
7718 case X86::VFMADDSS4rr_Int:
7719 case X86::VFNMADDSS4rr_Int:
7720 case X86::VFMSUBSS4rr_Int:
7721 case X86::VFNMSUBSS4rr_Int:
7722 case X86::VFMADD132SSr_Int:
7723 case X86::VFNMADD132SSr_Int:
7724 case X86::VFMADD213SSr_Int:
7725 case X86::VFNMADD213SSr_Int:
7726 case X86::VFMADD231SSr_Int:
7727 case X86::VFNMADD231SSr_Int:
7728 case X86::VFMSUB132SSr_Int:
7729 case X86::VFNMSUB132SSr_Int:
7730 case X86::VFMSUB213SSr_Int:
7731 case X86::VFNMSUB213SSr_Int:
7732 case X86::VFMSUB231SSr_Int:
7733 case X86::VFNMSUB231SSr_Int:
7734 case X86::VFMADD132SSZr_Int:
7735 case X86::VFNMADD132SSZr_Int:
7736 case X86::VFMADD213SSZr_Int:
7737 case X86::VFNMADD213SSZr_Int:
7738 case X86::VFMADD231SSZr_Int:
7739 case X86::VFNMADD231SSZr_Int:
7740 case X86::VFMSUB132SSZr_Int:
7741 case X86::VFNMSUB132SSZr_Int:
7742 case X86::VFMSUB213SSZr_Int:
7743 case X86::VFNMSUB213SSZr_Int:
7744 case X86::VFMSUB231SSZr_Int:
7745 case X86::VFNMSUB231SSZr_Int:
7746 case X86::VFMADD132SSZr_Intk:
7747 case X86::VFNMADD132SSZr_Intk:
7748 case X86::VFMADD213SSZr_Intk:
7749 case X86::VFNMADD213SSZr_Intk:
7750 case X86::VFMADD231SSZr_Intk:
7751 case X86::VFNMADD231SSZr_Intk:
7752 case X86::VFMSUB132SSZr_Intk:
7753 case X86::VFNMSUB132SSZr_Intk:
7754 case X86::VFMSUB213SSZr_Intk:
7755 case X86::VFNMSUB213SSZr_Intk:
7756 case X86::VFMSUB231SSZr_Intk:
7757 case X86::VFNMSUB231SSZr_Intk:
7758 case X86::VFMADD132SSZr_Intkz:
7759 case X86::VFNMADD132SSZr_Intkz:
7760 case X86::VFMADD213SSZr_Intkz:
7761 case X86::VFNMADD213SSZr_Intkz:
7762 case X86::VFMADD231SSZr_Intkz:
7763 case X86::VFNMADD231SSZr_Intkz:
7764 case X86::VFMSUB132SSZr_Intkz:
7765 case X86::VFNMSUB132SSZr_Intkz:
7766 case X86::VFMSUB213SSZr_Intkz:
7767 case X86::VFNMSUB213SSZr_Intkz:
7768 case X86::VFMSUB231SSZr_Intkz:
7769 case X86::VFNMSUB231SSZr_Intkz:
7770 case X86::VFIXUPIMMSSZrri:
7771 case X86::VFIXUPIMMSSZrrik:
7772 case X86::VFIXUPIMMSSZrrikz:
7773 case X86::VFPCLASSSSZri:
7774 case X86::VFPCLASSSSZrik:
7775 case X86::VGETEXPSSZr:
7776 case X86::VGETEXPSSZrk:
7777 case X86::VGETEXPSSZrkz:
7778 case X86::VGETMANTSSZrri:
7779 case X86::VGETMANTSSZrrik:
7780 case X86::VGETMANTSSZrrikz:
7781 case X86::VRANGESSZrri:
7782 case X86::VRANGESSZrrik:
7783 case X86::VRANGESSZrrikz:
7784 case X86::VRCP14SSZrr:
7785 case X86::VRCP14SSZrrk:
7786 case X86::VRCP14SSZrrkz:
7787 case X86::VRCP28SSZr:
7788 case X86::VRCP28SSZrk:
7789 case X86::VRCP28SSZrkz:
7790 case X86::VREDUCESSZrri:
7791 case X86::VREDUCESSZrrik:
7792 case X86::VREDUCESSZrrikz:
7793 case X86::VRNDSCALESSZrri_Int:
7794 case X86::VRNDSCALESSZrri_Intk:
7795 case X86::VRNDSCALESSZrri_Intkz:
7796 case X86::VRSQRT14SSZrr:
7797 case X86::VRSQRT14SSZrrk:
7798 case X86::VRSQRT14SSZrrkz:
7799 case X86::VRSQRT28SSZr:
7800 case X86::VRSQRT28SSZrk:
7801 case X86::VRSQRT28SSZrkz:
7802 case X86::VSCALEFSSZrr:
7803 case X86::VSCALEFSSZrrk:
7804 case X86::VSCALEFSSZrrkz:
7805 return false;
7806 default:
7807 return true;
7808 }
7809 }
7810
7811 if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm || Opc == X86::VMOVSDZrm ||
7812 Opc == X86::MOVSDrm_alt || Opc == X86::VMOVSDrm_alt ||
7813 Opc == X86::VMOVSDZrm_alt) &&
7814 RegSize > 64) {
7815 // These instructions only load 64 bits, we can't fold them if the
7816 // destination register is wider than 64 bits (8 bytes), and its user
7817 // instruction isn't scalar (SD).
7818 switch (UserOpc) {
7819 case X86::CVTSD2SSrr_Int:
7820 case X86::VCVTSD2SSrr_Int:
7821 case X86::VCVTSD2SSZrr_Int:
7822 case X86::VCVTSD2SSZrr_Intk:
7823 case X86::VCVTSD2SSZrr_Intkz:
7824 case X86::CVTSD2SIrr_Int:
7825 case X86::CVTSD2SI64rr_Int:
7826 case X86::VCVTSD2SIrr_Int:
7827 case X86::VCVTSD2SI64rr_Int:
7828 case X86::VCVTSD2SIZrr_Int:
7829 case X86::VCVTSD2SI64Zrr_Int:
7830 case X86::CVTTSD2SIrr_Int:
7831 case X86::CVTTSD2SI64rr_Int:
7832 case X86::VCVTTSD2SIrr_Int:
7833 case X86::VCVTTSD2SI64rr_Int:
7834 case X86::VCVTTSD2SIZrr_Int:
7835 case X86::VCVTTSD2SI64Zrr_Int:
7836 case X86::VCVTSD2USIZrr_Int:
7837 case X86::VCVTSD2USI64Zrr_Int:
7838 case X86::VCVTTSD2USIZrr_Int:
7839 case X86::VCVTTSD2USI64Zrr_Int:
7840 case X86::ROUNDSDri_Int:
7841 case X86::VROUNDSDri_Int:
7842 case X86::COMISDrr_Int:
7843 case X86::VCOMISDrr_Int:
7844 case X86::VCOMISDZrr_Int:
7845 case X86::UCOMISDrr_Int:
7846 case X86::VUCOMISDrr_Int:
7847 case X86::VUCOMISDZrr_Int:
7848 case X86::ADDSDrr_Int:
7849 case X86::VADDSDrr_Int:
7850 case X86::VADDSDZrr_Int:
7851 case X86::CMPSDrri_Int:
7852 case X86::VCMPSDrri_Int:
7853 case X86::VCMPSDZrri_Int:
7854 case X86::DIVSDrr_Int:
7855 case X86::VDIVSDrr_Int:
7856 case X86::VDIVSDZrr_Int:
7857 case X86::MAXSDrr_Int:
7858 case X86::VMAXSDrr_Int:
7859 case X86::VMAXSDZrr_Int:
7860 case X86::MINSDrr_Int:
7861 case X86::VMINSDrr_Int:
7862 case X86::VMINSDZrr_Int:
7863 case X86::MULSDrr_Int:
7864 case X86::VMULSDrr_Int:
7865 case X86::VMULSDZrr_Int:
7866 case X86::SQRTSDr_Int:
7867 case X86::VSQRTSDr_Int:
7868 case X86::VSQRTSDZr_Int:
7869 case X86::SUBSDrr_Int:
7870 case X86::VSUBSDrr_Int:
7871 case X86::VSUBSDZrr_Int:
7872 case X86::VADDSDZrr_Intk:
7873 case X86::VADDSDZrr_Intkz:
7874 case X86::VCMPSDZrri_Intk:
7875 case X86::VDIVSDZrr_Intk:
7876 case X86::VDIVSDZrr_Intkz:
7877 case X86::VMAXSDZrr_Intk:
7878 case X86::VMAXSDZrr_Intkz:
7879 case X86::VMINSDZrr_Intk:
7880 case X86::VMINSDZrr_Intkz:
7881 case X86::VMULSDZrr_Intk:
7882 case X86::VMULSDZrr_Intkz:
7883 case X86::VSQRTSDZr_Intk:
7884 case X86::VSQRTSDZr_Intkz:
7885 case X86::VSUBSDZrr_Intk:
7886 case X86::VSUBSDZrr_Intkz:
7887 case X86::VFMADDSD4rr_Int:
7888 case X86::VFNMADDSD4rr_Int:
7889 case X86::VFMSUBSD4rr_Int:
7890 case X86::VFNMSUBSD4rr_Int:
7891 case X86::VFMADD132SDr_Int:
7892 case X86::VFNMADD132SDr_Int:
7893 case X86::VFMADD213SDr_Int:
7894 case X86::VFNMADD213SDr_Int:
7895 case X86::VFMADD231SDr_Int:
7896 case X86::VFNMADD231SDr_Int:
7897 case X86::VFMSUB132SDr_Int:
7898 case X86::VFNMSUB132SDr_Int:
7899 case X86::VFMSUB213SDr_Int:
7900 case X86::VFNMSUB213SDr_Int:
7901 case X86::VFMSUB231SDr_Int:
7902 case X86::VFNMSUB231SDr_Int:
7903 case X86::VFMADD132SDZr_Int:
7904 case X86::VFNMADD132SDZr_Int:
7905 case X86::VFMADD213SDZr_Int:
7906 case X86::VFNMADD213SDZr_Int:
7907 case X86::VFMADD231SDZr_Int:
7908 case X86::VFNMADD231SDZr_Int:
7909 case X86::VFMSUB132SDZr_Int:
7910 case X86::VFNMSUB132SDZr_Int:
7911 case X86::VFMSUB213SDZr_Int:
7912 case X86::VFNMSUB213SDZr_Int:
7913 case X86::VFMSUB231SDZr_Int:
7914 case X86::VFNMSUB231SDZr_Int:
7915 case X86::VFMADD132SDZr_Intk:
7916 case X86::VFNMADD132SDZr_Intk:
7917 case X86::VFMADD213SDZr_Intk:
7918 case X86::VFNMADD213SDZr_Intk:
7919 case X86::VFMADD231SDZr_Intk:
7920 case X86::VFNMADD231SDZr_Intk:
7921 case X86::VFMSUB132SDZr_Intk:
7922 case X86::VFNMSUB132SDZr_Intk:
7923 case X86::VFMSUB213SDZr_Intk:
7924 case X86::VFNMSUB213SDZr_Intk:
7925 case X86::VFMSUB231SDZr_Intk:
7926 case X86::VFNMSUB231SDZr_Intk:
7927 case X86::VFMADD132SDZr_Intkz:
7928 case X86::VFNMADD132SDZr_Intkz:
7929 case X86::VFMADD213SDZr_Intkz:
7930 case X86::VFNMADD213SDZr_Intkz:
7931 case X86::VFMADD231SDZr_Intkz:
7932 case X86::VFNMADD231SDZr_Intkz:
7933 case X86::VFMSUB132SDZr_Intkz:
7934 case X86::VFNMSUB132SDZr_Intkz:
7935 case X86::VFMSUB213SDZr_Intkz:
7936 case X86::VFNMSUB213SDZr_Intkz:
7937 case X86::VFMSUB231SDZr_Intkz:
7938 case X86::VFNMSUB231SDZr_Intkz:
7939 case X86::VFIXUPIMMSDZrri:
7940 case X86::VFIXUPIMMSDZrrik:
7941 case X86::VFIXUPIMMSDZrrikz:
7942 case X86::VFPCLASSSDZri:
7943 case X86::VFPCLASSSDZrik:
7944 case X86::VGETEXPSDZr:
7945 case X86::VGETEXPSDZrk:
7946 case X86::VGETEXPSDZrkz:
7947 case X86::VGETMANTSDZrri:
7948 case X86::VGETMANTSDZrrik:
7949 case X86::VGETMANTSDZrrikz:
7950 case X86::VRANGESDZrri:
7951 case X86::VRANGESDZrrik:
7952 case X86::VRANGESDZrrikz:
7953 case X86::VRCP14SDZrr:
7954 case X86::VRCP14SDZrrk:
7955 case X86::VRCP14SDZrrkz:
7956 case X86::VRCP28SDZr:
7957 case X86::VRCP28SDZrk:
7958 case X86::VRCP28SDZrkz:
7959 case X86::VREDUCESDZrri:
7960 case X86::VREDUCESDZrrik:
7961 case X86::VREDUCESDZrrikz:
7962 case X86::VRNDSCALESDZrri_Int:
7963 case X86::VRNDSCALESDZrri_Intk:
7964 case X86::VRNDSCALESDZrri_Intkz:
7965 case X86::VRSQRT14SDZrr:
7966 case X86::VRSQRT14SDZrrk:
7967 case X86::VRSQRT14SDZrrkz:
7968 case X86::VRSQRT28SDZr:
7969 case X86::VRSQRT28SDZrk:
7970 case X86::VRSQRT28SDZrkz:
7971 case X86::VSCALEFSDZrr:
7972 case X86::VSCALEFSDZrrk:
7973 case X86::VSCALEFSDZrrkz:
7974 return false;
7975 default:
7976 return true;
7977 }
7978 }
7979
7980 if ((Opc == X86::VMOVSHZrm || Opc == X86::VMOVSHZrm_alt) && RegSize > 16) {
7981 // These instructions only load 16 bits, we can't fold them if the
7982 // destination register is wider than 16 bits (2 bytes), and its user
7983 // instruction isn't scalar (SH).
7984 switch (UserOpc) {
7985 case X86::VADDSHZrr_Int:
7986 case X86::VCMPSHZrri_Int:
7987 case X86::VDIVSHZrr_Int:
7988 case X86::VMAXSHZrr_Int:
7989 case X86::VMINSHZrr_Int:
7990 case X86::VMULSHZrr_Int:
7991 case X86::VSUBSHZrr_Int:
7992 case X86::VADDSHZrr_Intk:
7993 case X86::VADDSHZrr_Intkz:
7994 case X86::VCMPSHZrri_Intk:
7995 case X86::VDIVSHZrr_Intk:
7996 case X86::VDIVSHZrr_Intkz:
7997 case X86::VMAXSHZrr_Intk:
7998 case X86::VMAXSHZrr_Intkz:
7999 case X86::VMINSHZrr_Intk:
8000 case X86::VMINSHZrr_Intkz:
8001 case X86::VMULSHZrr_Intk:
8002 case X86::VMULSHZrr_Intkz:
8003 case X86::VSUBSHZrr_Intk:
8004 case X86::VSUBSHZrr_Intkz:
8005 case X86::VFMADD132SHZr_Int:
8006 case X86::VFNMADD132SHZr_Int:
8007 case X86::VFMADD213SHZr_Int:
8008 case X86::VFNMADD213SHZr_Int:
8009 case X86::VFMADD231SHZr_Int:
8010 case X86::VFNMADD231SHZr_Int:
8011 case X86::VFMSUB132SHZr_Int:
8012 case X86::VFNMSUB132SHZr_Int:
8013 case X86::VFMSUB213SHZr_Int:
8014 case X86::VFNMSUB213SHZr_Int:
8015 case X86::VFMSUB231SHZr_Int:
8016 case X86::VFNMSUB231SHZr_Int:
8017 case X86::VFMADD132SHZr_Intk:
8018 case X86::VFNMADD132SHZr_Intk:
8019 case X86::VFMADD213SHZr_Intk:
8020 case X86::VFNMADD213SHZr_Intk:
8021 case X86::VFMADD231SHZr_Intk:
8022 case X86::VFNMADD231SHZr_Intk:
8023 case X86::VFMSUB132SHZr_Intk:
8024 case X86::VFNMSUB132SHZr_Intk:
8025 case X86::VFMSUB213SHZr_Intk:
8026 case X86::VFNMSUB213SHZr_Intk:
8027 case X86::VFMSUB231SHZr_Intk:
8028 case X86::VFNMSUB231SHZr_Intk:
8029 case X86::VFMADD132SHZr_Intkz:
8030 case X86::VFNMADD132SHZr_Intkz:
8031 case X86::VFMADD213SHZr_Intkz:
8032 case X86::VFNMADD213SHZr_Intkz:
8033 case X86::VFMADD231SHZr_Intkz:
8034 case X86::VFNMADD231SHZr_Intkz:
8035 case X86::VFMSUB132SHZr_Intkz:
8036 case X86::VFNMSUB132SHZr_Intkz:
8037 case X86::VFMSUB213SHZr_Intkz:
8038 case X86::VFNMSUB213SHZr_Intkz:
8039 case X86::VFMSUB231SHZr_Intkz:
8040 case X86::VFNMSUB231SHZr_Intkz:
8041 return false;
8042 default:
8043 return true;
8044 }
8045 }
8046
8047 return false;
8048}
8049
8052 MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
8053 LiveIntervals *LIS) const {
8054
8055 // TODO: Support the case where LoadMI loads a wide register, but MI
8056 // only uses a subreg.
8057 for (auto Op : Ops) {
8058 if (MI.getOperand(Op).getSubReg())
8059 return nullptr;
8060 }
8061
8062 // If loading from a FrameIndex, fold directly from the FrameIndex.
8063 unsigned NumOps = LoadMI.getDesc().getNumOperands();
8064 int FrameIndex;
8065 if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
8066 if (isNonFoldablePartialRegisterLoad(LoadMI, MI, MF))
8067 return nullptr;
8068 return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex, LIS);
8069 }
8070
8071 // Check switch flag
8072 if (NoFusing)
8073 return nullptr;
8074
8075 // Avoid partial and undef register update stalls unless optimizing for size.
8076 if (!MF.getFunction().hasOptSize() &&
8077 (hasPartialRegUpdate(MI.getOpcode(), Subtarget, /*ForLoadFold*/ true) ||
8079 return nullptr;
8080
8081 // Determine the alignment of the load.
8082 Align Alignment;
8083 unsigned LoadOpc = LoadMI.getOpcode();
8084 if (LoadMI.hasOneMemOperand())
8085 Alignment = (*LoadMI.memoperands_begin())->getAlign();
8086 else
8087 switch (LoadOpc) {
8088 case X86::AVX512_512_SET0:
8089 case X86::AVX512_512_SETALLONES:
8090 Alignment = Align(64);
8091 break;
8092 case X86::AVX2_SETALLONES:
8093 case X86::AVX1_SETALLONES:
8094 case X86::AVX_SET0:
8095 case X86::AVX512_256_SET0:
8096 Alignment = Align(32);
8097 break;
8098 case X86::V_SET0:
8099 case X86::V_SETALLONES:
8100 case X86::AVX512_128_SET0:
8101 case X86::FsFLD0F128:
8102 case X86::AVX512_FsFLD0F128:
8103 Alignment = Align(16);
8104 break;
8105 case X86::MMX_SET0:
8106 case X86::FsFLD0SD:
8107 case X86::AVX512_FsFLD0SD:
8108 Alignment = Align(8);
8109 break;
8110 case X86::FsFLD0SS:
8111 case X86::AVX512_FsFLD0SS:
8112 Alignment = Align(4);
8113 break;
8114 case X86::FsFLD0SH:
8115 case X86::AVX512_FsFLD0SH:
8116 Alignment = Align(2);
8117 break;
8118 default:
8119 return nullptr;
8120 }
8121 if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
8122 unsigned NewOpc = 0;
8123 switch (MI.getOpcode()) {
8124 default:
8125 return nullptr;
8126 case X86::TEST8rr:
8127 NewOpc = X86::CMP8ri;
8128 break;
8129 case X86::TEST16rr:
8130 NewOpc = X86::CMP16ri;
8131 break;
8132 case X86::TEST32rr:
8133 NewOpc = X86::CMP32ri;
8134 break;
8135 case X86::TEST64rr:
8136 NewOpc = X86::CMP64ri32;
8137 break;
8138 }
8139 // Change to CMPXXri r, 0 first.
8140 MI.setDesc(get(NewOpc));
8141 MI.getOperand(1).ChangeToImmediate(0);
8142 } else if (Ops.size() != 1)
8143 return nullptr;
8144
8145 // Make sure the subregisters match.
8146 // Otherwise we risk changing the size of the load.
8147 if (LoadMI.getOperand(0).getSubReg() != MI.getOperand(Ops[0]).getSubReg())
8148 return nullptr;
8149
8151 switch (LoadOpc) {
8152 case X86::MMX_SET0:
8153 case X86::V_SET0:
8154 case X86::V_SETALLONES:
8155 case X86::AVX2_SETALLONES:
8156 case X86::AVX1_SETALLONES:
8157 case X86::AVX_SET0:
8158 case X86::AVX512_128_SET0:
8159 case X86::AVX512_256_SET0:
8160 case X86::AVX512_512_SET0:
8161 case X86::AVX512_512_SETALLONES:
8162 case X86::FsFLD0SH:
8163 case X86::AVX512_FsFLD0SH:
8164 case X86::FsFLD0SD:
8165 case X86::AVX512_FsFLD0SD:
8166 case X86::FsFLD0SS:
8167 case X86::AVX512_FsFLD0SS:
8168 case X86::FsFLD0F128:
8169 case X86::AVX512_FsFLD0F128: {
8170 // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
8171 // Create a constant-pool entry and operands to load from it.
8172
8173 // Large code model can't fold loads this way.
8175 return nullptr;
8176
8177 // x86-32 PIC requires a PIC base register for constant pools.
8178 unsigned PICBase = 0;
8179 // Since we're using Small or Kernel code model, we can always use
8180 // RIP-relative addressing for a smaller encoding.
8181 if (Subtarget.is64Bit()) {
8182 PICBase = X86::RIP;
8183 } else if (MF.getTarget().isPositionIndependent()) {
8184 // FIXME: PICBase = getGlobalBaseReg(&MF);
8185 // This doesn't work for several reasons.
8186 // 1. GlobalBaseReg may have been spilled.
8187 // 2. It may not be live at MI.
8188 return nullptr;
8189 }
8190
8191 // Create a constant-pool entry.
8193 Type *Ty;
8194 bool IsAllOnes = false;
8195 switch (LoadOpc) {
8196 case X86::FsFLD0SS:
8197 case X86::AVX512_FsFLD0SS:
8199 break;
8200 case X86::FsFLD0SD:
8201 case X86::AVX512_FsFLD0SD:
8203 break;
8204 case X86::FsFLD0F128:
8205 case X86::AVX512_FsFLD0F128:
8207 break;
8208 case X86::FsFLD0SH:
8209 case X86::AVX512_FsFLD0SH:
8211 break;
8212 case X86::AVX512_512_SETALLONES:
8213 IsAllOnes = true;
8214 [[fallthrough]];
8215 case X86::AVX512_512_SET0:
8217 16);
8218 break;
8219 case X86::AVX1_SETALLONES:
8220 case X86::AVX2_SETALLONES:
8221 IsAllOnes = true;
8222 [[fallthrough]];
8223 case X86::AVX512_256_SET0:
8224 case X86::AVX_SET0:
8226 8);
8227
8228 break;
8229 case X86::MMX_SET0:
8231 2);
8232 break;
8233 case X86::V_SETALLONES:
8234 IsAllOnes = true;
8235 [[fallthrough]];
8236 case X86::V_SET0:
8237 case X86::AVX512_128_SET0:
8239 4);
8240 break;
8241 }
8242
8243 const Constant *C =
8245 unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
8246
8247 // Create operands to load from the constant pool entry.
8248 MOs.push_back(MachineOperand::CreateReg(PICBase, false));
8250 MOs.push_back(MachineOperand::CreateReg(0, false));
8252 MOs.push_back(MachineOperand::CreateReg(0, false));
8253 break;
8254 }
8255 case X86::VPBROADCASTBZ128rm:
8256 case X86::VPBROADCASTBZ256rm:
8257 case X86::VPBROADCASTBZrm:
8258 case X86::VBROADCASTF32X2Z256rm:
8259 case X86::VBROADCASTF32X2Zrm:
8260 case X86::VBROADCASTI32X2Z128rm:
8261 case X86::VBROADCASTI32X2Z256rm:
8262 case X86::VBROADCASTI32X2Zrm:
8263 // No instructions currently fuse with 8bits or 32bits x 2.
8264 return nullptr;
8265
8266#define FOLD_BROADCAST(SIZE) \
8267 MOs.append(LoadMI.operands_begin() + NumOps - X86::AddrNumOperands, \
8268 LoadMI.operands_begin() + NumOps); \
8269 return foldMemoryBroadcast(MF, MI, Ops[0], MOs, InsertPt, /*Size=*/SIZE, \
8270 /*AllowCommute=*/true);
8271 case X86::VPBROADCASTWZ128rm:
8272 case X86::VPBROADCASTWZ256rm:
8273 case X86::VPBROADCASTWZrm:
8274 FOLD_BROADCAST(16);
8275 case X86::VPBROADCASTDZ128rm:
8276 case X86::VPBROADCASTDZ256rm:
8277 case X86::VPBROADCASTDZrm:
8278 case X86::VBROADCASTSSZ128rm:
8279 case X86::VBROADCASTSSZ256rm:
8280 case X86::VBROADCASTSSZrm:
8281 FOLD_BROADCAST(32);
8282 case X86::VPBROADCASTQZ128rm:
8283 case X86::VPBROADCASTQZ256rm:
8284 case X86::VPBROADCASTQZrm:
8285 case X86::VBROADCASTSDZ256rm:
8286 case X86::VBROADCASTSDZrm:
8287 FOLD_BROADCAST(64);
8288 default: {
8289 if (isNonFoldablePartialRegisterLoad(LoadMI, MI, MF))
8290 return nullptr;
8291
8292 // Folding a normal load. Just copy the load's address operands.
8293 MOs.append(LoadMI.operands_begin() + NumOps - X86::AddrNumOperands,
8294 LoadMI.operands_begin() + NumOps);
8295 break;
8296 }
8297 }
8298 return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, InsertPt,
8299 /*Size=*/0, Alignment, /*AllowCommute=*/true);
8300}
8301
8303X86InstrInfo::foldMemoryBroadcast(MachineFunction &MF, MachineInstr &MI,
8304 unsigned OpNum, ArrayRef<MachineOperand> MOs,
8306 unsigned BitsSize, bool AllowCommute) const {
8307
8308 if (auto *I = lookupBroadcastFoldTable(MI.getOpcode(), OpNum))
8309 return matchBroadcastSize(*I, BitsSize)
8310 ? fuseInst(MF, I->DstOp, OpNum, MOs, InsertPt, MI, *this)
8311 : nullptr;
8312
8313 if (AllowCommute) {
8314 // If the instruction and target operand are commutable, commute the
8315 // instruction and try again.
8316 unsigned CommuteOpIdx2 = commuteOperandsForFold(MI, OpNum);
8317 if (CommuteOpIdx2 == OpNum) {
8318 printFailMsgforFold(MI, OpNum);
8319 return nullptr;
8320 }
8321 MachineInstr *NewMI =
8322 foldMemoryBroadcast(MF, MI, CommuteOpIdx2, MOs, InsertPt, BitsSize,
8323 /*AllowCommute=*/false);
8324 if (NewMI)
8325 return NewMI;
8326 // Folding failed again - undo the commute before returning.
8327 commuteInstruction(MI, false, OpNum, CommuteOpIdx2);
8328 }
8329
8330 printFailMsgforFold(MI, OpNum);
8331 return nullptr;
8332}
8333
8337
8338 for (MachineMemOperand *MMO : MMOs) {
8339 if (!MMO->isLoad())
8340 continue;
8341
8342 if (!MMO->isStore()) {
8343 // Reuse the MMO.
8344 LoadMMOs.push_back(MMO);
8345 } else {
8346 // Clone the MMO and unset the store flag.
8347 LoadMMOs.push_back(MF.getMachineMemOperand(
8348 MMO, MMO->getFlags() & ~MachineMemOperand::MOStore));
8349 }
8350 }
8351
8352 return LoadMMOs;
8353}
8354
8358
8359 for (MachineMemOperand *MMO : MMOs) {
8360 if (!MMO->isStore())
8361 continue;
8362
8363 if (!MMO->isLoad()) {
8364 // Reuse the MMO.
8365 StoreMMOs.push_back(MMO);
8366 } else {
8367 // Clone the MMO and unset the load flag.
8368 StoreMMOs.push_back(MF.getMachineMemOperand(
8369 MMO, MMO->getFlags() & ~MachineMemOperand::MOLoad));
8370 }
8371 }
8372
8373 return StoreMMOs;
8374}
8375
8377 const TargetRegisterClass *RC,
8378 const X86Subtarget &STI) {
8379 assert(STI.hasAVX512() && "Expected at least AVX512!");
8380 unsigned SpillSize = STI.getRegisterInfo()->getSpillSize(*RC);
8381 assert((SpillSize == 64 || STI.hasVLX()) &&
8382 "Can't broadcast less than 64 bytes without AVX512VL!");
8383
8384#define CASE_BCAST_TYPE_OPC(TYPE, OP16, OP32, OP64) \
8385 case TYPE: \
8386 switch (SpillSize) { \
8387 default: \
8388 llvm_unreachable("Unknown spill size"); \
8389 case 16: \
8390 return X86::OP16; \
8391 case 32: \
8392 return X86::OP32; \
8393 case 64: \
8394 return X86::OP64; \
8395 } \
8396 break;
8397
8398 switch (I->Flags & TB_BCAST_MASK) {
8399 default:
8400 llvm_unreachable("Unexpected broadcast type!");
8401 CASE_BCAST_TYPE_OPC(TB_BCAST_W, VPBROADCASTWZ128rm, VPBROADCASTWZ256rm,
8402 VPBROADCASTWZrm)
8403 CASE_BCAST_TYPE_OPC(TB_BCAST_D, VPBROADCASTDZ128rm, VPBROADCASTDZ256rm,
8404 VPBROADCASTDZrm)
8405 CASE_BCAST_TYPE_OPC(TB_BCAST_Q, VPBROADCASTQZ128rm, VPBROADCASTQZ256rm,
8406 VPBROADCASTQZrm)
8407 CASE_BCAST_TYPE_OPC(TB_BCAST_SH, VPBROADCASTWZ128rm, VPBROADCASTWZ256rm,
8408 VPBROADCASTWZrm)
8409 CASE_BCAST_TYPE_OPC(TB_BCAST_SS, VBROADCASTSSZ128rm, VBROADCASTSSZ256rm,
8410 VBROADCASTSSZrm)
8411 CASE_BCAST_TYPE_OPC(TB_BCAST_SD, VMOVDDUPZ128rm, VBROADCASTSDZ256rm,
8412 VBROADCASTSDZrm)
8413 }
8414}
8415
8417 MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad,
8418 bool UnfoldStore, SmallVectorImpl<MachineInstr *> &NewMIs) const {
8419 const X86FoldTableEntry *I = lookupUnfoldTable(MI.getOpcode());
8420 if (I == nullptr)
8421 return false;
8422 unsigned Opc = I->DstOp;
8423 unsigned Index = I->Flags & TB_INDEX_MASK;
8424 bool FoldedLoad = I->Flags & TB_FOLDED_LOAD;
8425 bool FoldedStore = I->Flags & TB_FOLDED_STORE;
8426 if (UnfoldLoad && !FoldedLoad)
8427 return false;
8428 UnfoldLoad &= FoldedLoad;
8429 if (UnfoldStore && !FoldedStore)
8430 return false;
8431 UnfoldStore &= FoldedStore;
8432
8433 const MCInstrDesc &MCID = get(Opc);
8434
8435 const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
8437 // TODO: Check if 32-byte or greater accesses are slow too?
8438 if (!MI.hasOneMemOperand() && RC == &X86::VR128RegClass &&
8439 Subtarget.isUnalignedMem16Slow())
8440 // Without memoperands, loadRegFromAddr and storeRegToStackSlot will
8441 // conservatively assume the address is unaligned. That's bad for
8442 // performance.
8443 return false;
8448 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
8449 MachineOperand &Op = MI.getOperand(i);
8450 if (i >= Index && i < Index + X86::AddrNumOperands)
8451 AddrOps.push_back(Op);
8452 else if (Op.isReg() && Op.isImplicit())
8453 ImpOps.push_back(Op);
8454 else if (i < Index)
8455 BeforeOps.push_back(Op);
8456 else if (i > Index)
8457 AfterOps.push_back(Op);
8458 }
8459
8460 // Emit the load or broadcast instruction.
8461 if (UnfoldLoad) {
8462 auto MMOs = extractLoadMMOs(MI.memoperands(), MF);
8463
8464 unsigned Opc;
8465 if (I->Flags & TB_BCAST_MASK) {
8466 Opc = getBroadcastOpcode(I, RC, Subtarget);
8467 } else {
8468 unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
8469 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
8470 Opc = getLoadRegOpcode(Reg, RC, isAligned, Subtarget);
8471 }
8472
8473 DebugLoc DL;
8474 MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), Reg);
8475 for (const MachineOperand &AddrOp : AddrOps)
8476 MIB.add(AddrOp);
8477 MIB.setMemRefs(MMOs);
8478 NewMIs.push_back(MIB);
8479
8480 if (UnfoldStore) {
8481 // Address operands cannot be marked isKill.
8482 for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
8483 MachineOperand &MO = NewMIs[0]->getOperand(i);
8484 if (MO.isReg())
8485 MO.setIsKill(false);
8486 }
8487 }
8488 }
8489
8490 // Emit the data processing instruction.
8491 MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI.getDebugLoc(), true);
8492 MachineInstrBuilder MIB(MF, DataMI);
8493
8494 if (FoldedStore)
8495 MIB.addReg(Reg, RegState::Define);
8496 for (MachineOperand &BeforeOp : BeforeOps)
8497 MIB.add(BeforeOp);
8498 if (FoldedLoad)
8499 MIB.addReg(Reg);
8500 for (MachineOperand &AfterOp : AfterOps)
8501 MIB.add(AfterOp);
8502 for (MachineOperand &ImpOp : ImpOps) {
8503 MIB.addReg(ImpOp.getReg(), getDefRegState(ImpOp.isDef()) |
8505 getKillRegState(ImpOp.isKill()) |
8506 getDeadRegState(ImpOp.isDead()) |
8507 getUndefRegState(ImpOp.isUndef()));
8508 }
8509 // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
8510 switch (DataMI->getOpcode()) {
8511 default:
8512 break;
8513 case X86::CMP64ri32:
8514 case X86::CMP32ri:
8515 case X86::CMP16ri:
8516 case X86::CMP8ri: {
8517 MachineOperand &MO0 = DataMI->getOperand(0);
8518 MachineOperand &MO1 = DataMI->getOperand(1);
8519 if (MO1.isImm() && MO1.getImm() == 0) {
8520 unsigned NewOpc;
8521 switch (DataMI->getOpcode()) {
8522 default:
8523 llvm_unreachable("Unreachable!");
8524 case X86::CMP64ri32:
8525 NewOpc = X86::TEST64rr;
8526 break;
8527 case X86::CMP32ri:
8528 NewOpc = X86::TEST32rr;
8529 break;
8530 case X86::CMP16ri:
8531 NewOpc = X86::TEST16rr;
8532 break;
8533 case X86::CMP8ri:
8534 NewOpc = X86::TEST8rr;
8535 break;
8536 }
8537 DataMI->setDesc(get(NewOpc));
8538 MO1.ChangeToRegister(MO0.getReg(), false);
8539 }
8540 }
8541 }
8542 NewMIs.push_back(DataMI);
8543
8544 // Emit the store instruction.
8545 if (UnfoldStore) {
8546 const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF);
8547 auto MMOs = extractStoreMMOs(MI.memoperands(), MF);
8548 unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*DstRC), 16);
8549 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
8550 unsigned Opc = getStoreRegOpcode(Reg, DstRC, isAligned, Subtarget);
8551 DebugLoc DL;
8552 MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
8553 for (const MachineOperand &AddrOp : AddrOps)
8554 MIB.add(AddrOp);
8555 MIB.addReg(Reg, RegState::Kill);
8556 MIB.setMemRefs(MMOs);
8557 NewMIs.push_back(MIB);
8558 }
8559
8560 return true;
8561}
8562
8564 SelectionDAG &DAG, SDNode *N, SmallVectorImpl<SDNode *> &NewNodes) const {
8565 if (!N->isMachineOpcode())
8566 return false;
8567
8568 const X86FoldTableEntry *I = lookupUnfoldTable(N->getMachineOpcode());
8569 if (I == nullptr)
8570 return false;
8571 unsigned Opc = I->DstOp;
8572 unsigned Index = I->Flags & TB_INDEX_MASK;
8573 bool FoldedLoad = I->Flags & TB_FOLDED_LOAD;
8574 bool FoldedStore = I->Flags & TB_FOLDED_STORE;
8575 const MCInstrDesc &MCID = get(Opc);
8578 const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
8579 unsigned NumDefs = MCID.NumDefs;
8580 std::vector<SDValue> AddrOps;
8581 std::vector<SDValue> BeforeOps;
8582 std::vector<SDValue> AfterOps;
8583 SDLoc dl(N);
8584 unsigned NumOps = N->getNumOperands();
8585 for (unsigned i = 0; i != NumOps - 1; ++i) {
8586 SDValue Op = N->getOperand(i);
8587 if (i >= Index - NumDefs && i < Index - NumDefs + X86::AddrNumOperands)
8588 AddrOps.push_back(Op);
8589 else if (i < Index - NumDefs)
8590 BeforeOps.push_back(Op);
8591 else if (i > Index - NumDefs)
8592 AfterOps.push_back(Op);
8593 }
8594 SDValue Chain = N->getOperand(NumOps - 1);
8595 AddrOps.push_back(Chain);
8596
8597 // Emit the load instruction.
8598 SDNode *Load = nullptr;
8599 if (FoldedLoad) {
8600 EVT VT = *TRI.legalclasstypes_begin(*RC);
8601 auto MMOs = extractLoadMMOs(cast<MachineSDNode>(N)->memoperands(), MF);
8602 if (MMOs.empty() && RC == &X86::VR128RegClass &&
8603 Subtarget.isUnalignedMem16Slow())
8604 // Do not introduce a slow unaligned load.
8605 return false;
8606 // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
8607 // memory access is slow above.
8608
8609 unsigned Opc;
8610 if (I->Flags & TB_BCAST_MASK) {
8611 Opc = getBroadcastOpcode(I, RC, Subtarget);
8612 } else {
8613 unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
8614 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
8615 Opc = getLoadRegOpcode(0, RC, isAligned, Subtarget);
8616 }
8617
8618 Load = DAG.getMachineNode(Opc, dl, VT, MVT::Other, AddrOps);
8619 NewNodes.push_back(Load);
8620
8621 // Preserve memory reference information.
8622 DAG.setNodeMemRefs(cast<MachineSDNode>(Load), MMOs);
8623 }
8624
8625 // Emit the data processing instruction.
8626 std::vector<EVT> VTs;
8627 const TargetRegisterClass *DstRC = nullptr;
8628 if (MCID.getNumDefs() > 0) {
8629 DstRC = getRegClass(MCID, 0, &RI, MF);
8630 VTs.push_back(*TRI.legalclasstypes_begin(*DstRC));
8631 }
8632 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
8633 EVT VT = N->getValueType(i);
8634 if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs())
8635 VTs.push_back(VT);
8636 }
8637 if (Load)
8638 BeforeOps.push_back(SDValue(Load, 0));
8639 llvm::append_range(BeforeOps, AfterOps);
8640 // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
8641 switch (Opc) {
8642 default:
8643 break;
8644 case X86::CMP64ri32:
8645 case X86::CMP32ri:
8646 case X86::CMP16ri:
8647 case X86::CMP8ri:
8648 if (isNullConstant(BeforeOps[1])) {
8649 switch (Opc) {
8650 default:
8651 llvm_unreachable("Unreachable!");
8652 case X86::CMP64ri32:
8653 Opc = X86::TEST64rr;
8654 break;
8655 case X86::CMP32ri:
8656 Opc = X86::TEST32rr;
8657 break;
8658 case X86::CMP16ri:
8659 Opc = X86::TEST16rr;
8660 break;
8661 case X86::CMP8ri:
8662 Opc = X86::TEST8rr;
8663 break;
8664 }
8665 BeforeOps[1] = BeforeOps[0];
8666 }
8667 }
8668 SDNode *NewNode = DAG.getMachineNode(Opc, dl, VTs, BeforeOps);
8669 NewNodes.push_back(NewNode);
8670
8671 // Emit the store instruction.
8672 if (FoldedStore) {
8673 AddrOps.pop_back();
8674 AddrOps.push_back(SDValue(NewNode, 0));
8675 AddrOps.push_back(Chain);
8676 auto MMOs = extractStoreMMOs(cast<MachineSDNode>(N)->memoperands(), MF);
8677 if (MMOs.empty() && RC == &X86::VR128RegClass &&
8678 Subtarget.isUnalignedMem16Slow())
8679 // Do not introduce a slow unaligned store.
8680 return false;
8681 // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
8682 // memory access is slow above.
8683 unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
8684 bool isAligned = !MMOs.empty() && MMOs.front()->getAlign() >= Alignment;
8685 SDNode *Store =
8686 DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, Subtarget),
8687 dl, MVT::Other, AddrOps);
8688 NewNodes.push_back(Store);
8689
8690 // Preserve memory reference information.
8691 DAG.setNodeMemRefs(cast<MachineSDNode>(Store), MMOs);
8692 }
8693
8694 return true;
8695}
8696
8697unsigned
8698X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad,
8699 bool UnfoldStore,
8700 unsigned *LoadRegIndex) const {
8702 if (I == nullptr)
8703 return 0;
8704 bool FoldedLoad = I->Flags & TB_FOLDED_LOAD;
8705 bool FoldedStore = I->Flags & TB_FOLDED_STORE;
8706 if (UnfoldLoad && !FoldedLoad)
8707 return 0;
8708 if (UnfoldStore && !FoldedStore)
8709 return 0;
8710 if (LoadRegIndex)
8711 *LoadRegIndex = I->Flags & TB_INDEX_MASK;
8712 return I->DstOp;
8713}
8714
8716 int64_t &Offset1,
8717 int64_t &Offset2) const {
8718 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
8719 return false;
8720
8721 auto IsLoadOpcode = [&](unsigned Opcode) {
8722 switch (Opcode) {
8723 default:
8724 return false;
8725 case X86::MOV8rm:
8726 case X86::MOV16rm:
8727 case X86::MOV32rm:
8728 case X86::MOV64rm:
8729 case X86::LD_Fp32m:
8730 case X86::LD_Fp64m:
8731 case X86::LD_Fp80m:
8732 case X86::MOVSSrm:
8733 case X86::MOVSSrm_alt:
8734 case X86::MOVSDrm:
8735 case X86::MOVSDrm_alt:
8736 case X86::MMX_MOVD64rm:
8737 case X86::MMX_MOVQ64rm:
8738 case X86::MOVAPSrm:
8739 case X86::MOVUPSrm:
8740 case X86::MOVAPDrm:
8741 case X86::MOVUPDrm:
8742 case X86::MOVDQArm:
8743 case X86::MOVDQUrm:
8744 // AVX load instructions
8745 case X86::VMOVSSrm:
8746 case X86::VMOVSSrm_alt:
8747 case X86::VMOVSDrm:
8748 case X86::VMOVSDrm_alt:
8749 case X86::VMOVAPSrm:
8750 case X86::VMOVUPSrm:
8751 case X86::VMOVAPDrm:
8752 case X86::VMOVUPDrm:
8753 case X86::VMOVDQArm:
8754 case X86::VMOVDQUrm:
8755 case X86::VMOVAPSYrm:
8756 case X86::VMOVUPSYrm:
8757 case X86::VMOVAPDYrm:
8758 case X86::VMOVUPDYrm:
8759 case X86::VMOVDQAYrm:
8760 case X86::VMOVDQUYrm:
8761 // AVX512 load instructions
8762 case X86::VMOVSSZrm:
8763 case X86::VMOVSSZrm_alt:
8764 case X86::VMOVSDZrm:
8765 case X86::VMOVSDZrm_alt:
8766 case X86::VMOVAPSZ128rm:
8767 case X86::VMOVUPSZ128rm:
8768 case X86::VMOVAPSZ128rm_NOVLX:
8769 case X86::VMOVUPSZ128rm_NOVLX:
8770 case X86::VMOVAPDZ128rm:
8771 case X86::VMOVUPDZ128rm:
8772 case X86::VMOVDQU8Z128rm:
8773 case X86::VMOVDQU16Z128rm:
8774 case X86::VMOVDQA32Z128rm:
8775 case X86::VMOVDQU32Z128rm:
8776 case X86::VMOVDQA64Z128rm:
8777 case X86::VMOVDQU64Z128rm:
8778 case X86::VMOVAPSZ256rm:
8779 case X86::VMOVUPSZ256rm:
8780 case X86::VMOVAPSZ256rm_NOVLX:
8781 case X86::VMOVUPSZ256rm_NOVLX:
8782 case X86::VMOVAPDZ256rm:
8783 case X86::VMOVUPDZ256rm:
8784 case X86::VMOVDQU8Z256rm:
8785 case X86::VMOVDQU16Z256rm:
8786 case X86::VMOVDQA32Z256rm:
8787 case X86::VMOVDQU32Z256rm:
8788 case X86::VMOVDQA64Z256rm:
8789 case X86::VMOVDQU64Z256rm:
8790 case X86::VMOVAPSZrm:
8791 case X86::VMOVUPSZrm:
8792 case X86::VMOVAPDZrm:
8793 case X86::VMOVUPDZrm:
8794 case X86::VMOVDQU8Zrm:
8795 case X86::VMOVDQU16Zrm:
8796 case X86::VMOVDQA32Zrm:
8797 case X86::VMOVDQU32Zrm:
8798 case X86::VMOVDQA64Zrm:
8799 case X86::VMOVDQU64Zrm:
8800 case X86::KMOVBkm:
8801 case X86::KMOVBkm_EVEX:
8802 case X86::KMOVWkm:
8803 case X86::KMOVWkm_EVEX:
8804 case X86::KMOVDkm:
8805 case X86::KMOVDkm_EVEX:
8806 case X86::KMOVQkm:
8807 case X86::KMOVQkm_EVEX:
8808 return true;
8809 }
8810 };
8811
8812 if (!IsLoadOpcode(Load1->getMachineOpcode()) ||
8813 !IsLoadOpcode(Load2->getMachineOpcode()))
8814 return false;
8815
8816 // Lambda to check if both the loads have the same value for an operand index.
8817 auto HasSameOp = [&](int I) {
8818 return Load1->getOperand(I) == Load2->getOperand(I);
8819 };
8820
8821 // All operands except the displacement should match.
8822 if (!HasSameOp(X86::AddrBaseReg) || !HasSameOp(X86::AddrScaleAmt) ||
8823 !HasSameOp(X86::AddrIndexReg) || !HasSameOp(X86::AddrSegmentReg))
8824 return false;
8825
8826 // Chain Operand must be the same.
8827 if (!HasSameOp(5))
8828 return false;
8829
8830 // Now let's examine if the displacements are constants.
8831 auto Disp1 = dyn_cast<ConstantSDNode>(Load1->getOperand(X86::AddrDisp));
8832 auto Disp2 = dyn_cast<ConstantSDNode>(Load2->getOperand(X86::AddrDisp));
8833 if (!Disp1 || !Disp2)
8834 return false;
8835
8836 Offset1 = Disp1->getSExtValue();
8837 Offset2 = Disp2->getSExtValue();
8838 return true;
8839}
8840
8842 int64_t Offset1, int64_t Offset2,
8843 unsigned NumLoads) const {
8844 assert(Offset2 > Offset1);
8845 if ((Offset2 - Offset1) / 8 > 64)
8846 return false;
8847
8848 unsigned Opc1 = Load1->getMachineOpcode();
8849 unsigned Opc2 = Load2->getMachineOpcode();
8850 if (Opc1 != Opc2)
8851 return false; // FIXME: overly conservative?
8852
8853 switch (Opc1) {
8854 default:
8855 break;
8856 case X86::LD_Fp32m:
8857 case X86::LD_Fp64m:
8858 case X86::LD_Fp80m:
8859 case X86::MMX_MOVD64rm:
8860 case X86::MMX_MOVQ64rm:
8861 return false;
8862 }
8863
8864 EVT VT = Load1->getValueType(0);
8865 switch (VT.getSimpleVT().SimpleTy) {
8866 default:
8867 // XMM registers. In 64-bit mode we can be a bit more aggressive since we
8868 // have 16 of them to play with.
8869 if (Subtarget.is64Bit()) {
8870 if (NumLoads >= 3)
8871 return false;
8872 } else if (NumLoads) {
8873 return false;
8874 }
8875 break;
8876 case MVT::i8:
8877 case MVT::i16:
8878 case MVT::i32:
8879 case MVT::i64:
8880 case MVT::f32:
8881 case MVT::f64:
8882 if (NumLoads)
8883 return false;
8884 break;
8885 }
8886
8887 return true;
8888}
8889
8891 const MachineBasicBlock *MBB,
8892 const MachineFunction &MF) const {
8893
8894 // ENDBR instructions should not be scheduled around.
8895 unsigned Opcode = MI.getOpcode();
8896 if (Opcode == X86::ENDBR64 || Opcode == X86::ENDBR32 ||
8897 Opcode == X86::PLDTILECFGV)
8898 return true;
8899
8900 // Frame setup and destory can't be scheduled around.
8901 if (MI.getFlag(MachineInstr::FrameSetup) ||
8903 return true;
8904
8906}
8907
8910 assert(Cond.size() == 1 && "Invalid X86 branch condition!");
8911 X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
8913 return false;
8914}
8915
8917 const TargetRegisterClass *RC) const {
8918 // FIXME: Return false for x87 stack register classes for now. We can't
8919 // allow any loads of these registers before FpGet_ST0_80.
8920 return !(RC == &X86::CCRRegClass || RC == &X86::DFCCRRegClass ||
8921 RC == &X86::RFP32RegClass || RC == &X86::RFP64RegClass ||
8922 RC == &X86::RFP80RegClass);
8923}
8924
8925/// Return a virtual register initialized with the
8926/// the global base register value. Output instructions required to
8927/// initialize the register in the function entry block, if necessary.
8928///
8929/// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
8930///
8933 Register GlobalBaseReg = X86FI->getGlobalBaseReg();
8934 if (GlobalBaseReg != 0)
8935 return GlobalBaseReg;
8936
8937 // Create the register. The code to initialize it is inserted
8938 // later, by the CGBR pass (below).
8939 MachineRegisterInfo &RegInfo = MF->getRegInfo();
8940 GlobalBaseReg = RegInfo.createVirtualRegister(
8941 Subtarget.is64Bit() ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass);
8942 X86FI->setGlobalBaseReg(GlobalBaseReg);
8943 return GlobalBaseReg;
8944}
8945
8946// FIXME: Some shuffle and unpack instructions have equivalents in different
8947// domains, but they require a bit more work than just switching opcodes.
8948
8949static const uint16_t *lookup(unsigned opcode, unsigned domain,
8950 ArrayRef<uint16_t[3]> Table) {
8951 for (const uint16_t(&Row)[3] : Table)
8952 if (Row[domain - 1] == opcode)
8953 return Row;
8954 return nullptr;
8955}
8956
8957static const uint16_t *lookupAVX512(unsigned opcode, unsigned domain,
8958 ArrayRef<uint16_t[4]> Table) {
8959 // If this is the integer domain make sure to check both integer columns.
8960 for (const uint16_t(&Row)[4] : Table)
8961 if (Row[domain - 1] == opcode || (domain == 3 && Row[3] == opcode))
8962 return Row;
8963 return nullptr;
8964}
8965
8966// Helper to attempt to widen/narrow blend masks.
8967static bool AdjustBlendMask(unsigned OldMask, unsigned OldWidth,
8968 unsigned NewWidth, unsigned *pNewMask = nullptr) {
8969 assert(((OldWidth % NewWidth) == 0 || (NewWidth % OldWidth) == 0) &&
8970 "Illegal blend mask scale");
8971 unsigned NewMask = 0;
8972
8973 if ((OldWidth % NewWidth) == 0) {
8974 unsigned Scale = OldWidth / NewWidth;
8975 unsigned SubMask = (1u << Scale) - 1;
8976 for (unsigned i = 0; i != NewWidth; ++i) {
8977 unsigned Sub = (OldMask >> (i * Scale)) & SubMask;
8978 if (Sub == SubMask)
8979 NewMask |= (1u << i);
8980 else if (Sub != 0x0)
8981 return false;
8982 }
8983 } else {
8984 unsigned Scale = NewWidth / OldWidth;
8985 unsigned SubMask = (1u << Scale) - 1;
8986 for (unsigned i = 0; i != OldWidth; ++i) {
8987 if (OldMask & (1 << i)) {
8988 NewMask |= (SubMask << (i * Scale));
8989 }
8990 }
8991 }
8992
8993 if (pNewMask)
8994 *pNewMask = NewMask;
8995 return true;
8996}
8997
8999 unsigned Opcode = MI.getOpcode();
9000 unsigned NumOperands = MI.getDesc().getNumOperands();
9001
9002 auto GetBlendDomains = [&](unsigned ImmWidth, bool Is256) {
9003 uint16_t validDomains = 0;
9004 if (MI.getOperand(NumOperands - 1).isImm()) {
9005 unsigned Imm = MI.getOperand(NumOperands - 1).getImm();
9006 if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4))
9007 validDomains |= 0x2; // PackedSingle
9008 if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2))
9009 validDomains |= 0x4; // PackedDouble
9010 if (!Is256 || Subtarget.hasAVX2())
9011 validDomains |= 0x8; // PackedInt
9012 }
9013 return validDomains;
9014 };
9015
9016 switch (Opcode) {
9017 case X86::BLENDPDrmi:
9018 case X86::BLENDPDrri:
9019 case X86::VBLENDPDrmi:
9020 case X86::VBLENDPDrri:
9021 return GetBlendDomains(2, false);
9022 case X86::VBLENDPDYrmi:
9023 case X86::VBLENDPDYrri:
9024 return GetBlendDomains(4, true);
9025 case X86::BLENDPSrmi:
9026 case X86::BLENDPSrri:
9027 case X86::VBLENDPSrmi:
9028 case X86::VBLENDPSrri:
9029 case X86::VPBLENDDrmi:
9030 case X86::VPBLENDDrri:
9031 return GetBlendDomains(4, false);
9032 case X86::VBLENDPSYrmi:
9033 case X86::VBLENDPSYrri:
9034 case X86::VPBLENDDYrmi:
9035 case X86::VPBLENDDYrri:
9036 return GetBlendDomains(8, true);
9037 case X86::PBLENDWrmi:
9038 case X86::PBLENDWrri:
9039 case X86::VPBLENDWrmi:
9040 case X86::VPBLENDWrri:
9041 // Treat VPBLENDWY as a 128-bit vector as it repeats the lo/hi masks.
9042 case X86::VPBLENDWYrmi:
9043 case X86::VPBLENDWYrri:
9044 return GetBlendDomains(8, false);
9045 case X86::VPANDDZ128rr:
9046 case X86::VPANDDZ128rm:
9047 case X86::VPANDDZ256rr:
9048 case X86::VPANDDZ256rm:
9049 case X86::VPANDQZ128rr:
9050 case X86::VPANDQZ128rm:
9051 case X86::VPANDQZ256rr:
9052 case X86::VPANDQZ256rm:
9053 case X86::VPANDNDZ128rr:
9054 case X86::VPANDNDZ128rm:
9055 case X86::VPANDNDZ256rr:
9056 case X86::VPANDNDZ256rm:
9057 case X86::VPANDNQZ128rr:
9058 case X86::VPANDNQZ128rm:
9059 case X86::VPANDNQZ256rr:
9060 case X86::VPANDNQZ256rm:
9061 case X86::VPORDZ128rr:
9062 case X86::VPORDZ128rm:
9063 case X86::VPORDZ256rr:
9064 case X86::VPORDZ256rm:
9065 case X86::VPORQZ128rr:
9066 case X86::VPORQZ128rm:
9067 case X86::VPORQZ256rr:
9068 case X86::VPORQZ256rm:
9069 case X86::VPXORDZ128rr:
9070 case X86::VPXORDZ128rm:
9071 case X86::VPXORDZ256rr:
9072 case X86::VPXORDZ256rm:
9073 case X86::VPXORQZ128rr:
9074 case X86::VPXORQZ128rm:
9075 case X86::VPXORQZ256rr:
9076 case X86::VPXORQZ256rm:
9077 // If we don't have DQI see if we can still switch from an EVEX integer
9078 // instruction to a VEX floating point instruction.
9079 if (Subtarget.hasDQI())
9080 return 0;
9081
9082 if (RI.getEncodingValue(MI.getOperand(0).getReg()) >= 16)
9083 return 0;
9084 if (RI.getEncodingValue(MI.getOperand(1).getReg()) >= 16)
9085 return 0;
9086 // Register forms will have 3 operands. Memory form will have more.
9087 if (NumOperands == 3 &&
9088 RI.getEncodingValue(MI.getOperand(2).getReg()) >= 16)
9089 return 0;
9090
9091 // All domains are valid.
9092 return 0xe;
9093 case X86::MOVHLPSrr:
9094 // We can swap domains when both inputs are the same register.
9095 // FIXME: This doesn't catch all the cases we would like. If the input
9096 // register isn't KILLed by the instruction, the two address instruction
9097 // pass puts a COPY on one input. The other input uses the original
9098 // register. This prevents the same physical register from being used by
9099 // both inputs.
9100 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
9101 MI.getOperand(0).getSubReg() == 0 &&
9102 MI.getOperand(1).getSubReg() == 0 && MI.getOperand(2).getSubReg() == 0)
9103 return 0x6;
9104 return 0;
9105 case X86::SHUFPDrri:
9106 return 0x6;
9107 }
9108 return 0;
9109}
9110
9111#include "X86ReplaceableInstrs.def"
9112
9114 unsigned Domain) const {
9115 assert(Domain > 0 && Domain < 4 && "Invalid execution domain");
9116 uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
9117 assert(dom && "Not an SSE instruction");
9118
9119 unsigned Opcode = MI.getOpcode();
9120 unsigned NumOperands = MI.getDesc().getNumOperands();
9121
9122 auto SetBlendDomain = [&](unsigned ImmWidth, bool Is256) {
9123 if (MI.getOperand(NumOperands - 1).isImm()) {
9124 unsigned Imm = MI.getOperand(NumOperands - 1).getImm() & 255;
9125 Imm = (ImmWidth == 16 ? ((Imm << 8) | Imm) : Imm);
9126 unsigned NewImm = Imm;
9127
9128 const uint16_t *table = lookup(Opcode, dom, ReplaceableBlendInstrs);
9129 if (!table)
9130 table = lookup(Opcode, dom, ReplaceableBlendAVX2Instrs);
9131
9132 if (Domain == 1) { // PackedSingle
9133 AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm);
9134 } else if (Domain == 2) { // PackedDouble
9135 AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2, &NewImm);
9136 } else if (Domain == 3) { // PackedInt
9137 if (Subtarget.hasAVX2()) {
9138 // If we are already VPBLENDW use that, else use VPBLENDD.
9139 if ((ImmWidth / (Is256 ? 2 : 1)) != 8) {
9140 table = lookup(Opcode, dom, ReplaceableBlendAVX2Instrs);
9141 AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm);
9142 }
9143 } else {
9144 assert(!Is256 && "128-bit vector expected");
9145 AdjustBlendMask(Imm, ImmWidth, 8, &NewImm);
9146 }
9147 }
9148
9149 assert(table && table[Domain - 1] && "Unknown domain op");
9150 MI.setDesc(get(table[Domain - 1]));
9151 MI.getOperand(NumOperands - 1).setImm(NewImm & 255);
9152 }
9153 return true;
9154 };
9155
9156 switch (Opcode) {
9157 case X86::BLENDPDrmi:
9158 case X86::BLENDPDrri:
9159 case X86::VBLENDPDrmi:
9160 case X86::VBLENDPDrri:
9161 return SetBlendDomain(2, false);
9162 case X86::VBLENDPDYrmi:
9163 case X86::VBLENDPDYrri:
9164 return SetBlendDomain(4, true);
9165 case X86::BLENDPSrmi:
9166 case X86::BLENDPSrri:
9167 case X86::VBLENDPSrmi:
9168 case X86::VBLENDPSrri:
9169 case X86::VPBLENDDrmi:
9170 case X86::VPBLENDDrri:
9171 return SetBlendDomain(4, false);
9172 case X86::VBLENDPSYrmi:
9173 case X86::VBLENDPSYrri:
9174 case X86::VPBLENDDYrmi:
9175 case X86::VPBLENDDYrri:
9176 return SetBlendDomain(8, true);
9177 case X86::PBLENDWrmi:
9178 case X86::PBLENDWrri:
9179 case X86::VPBLENDWrmi:
9180 case X86::VPBLENDWrri:
9181 return SetBlendDomain(8, false);
9182 case X86::VPBLENDWYrmi:
9183 case X86::VPBLENDWYrri:
9184 return SetBlendDomain(16, true);
9185 case X86::VPANDDZ128rr:
9186 case X86::VPANDDZ128rm:
9187 case X86::VPANDDZ256rr:
9188 case X86::VPANDDZ256rm:
9189 case X86::VPANDQZ128rr:
9190 case X86::VPANDQZ128rm:
9191 case X86::VPANDQZ256rr:
9192 case X86::VPANDQZ256rm:
9193 case X86::VPANDNDZ128rr:
9194 case X86::VPANDNDZ128rm:
9195 case X86::VPANDNDZ256rr:
9196 case X86::VPANDNDZ256rm:
9197 case X86::VPANDNQZ128rr:
9198 case X86::VPANDNQZ128rm:
9199 case X86::VPANDNQZ256rr:
9200 case X86::VPANDNQZ256rm:
9201 case X86::VPORDZ128rr:
9202 case X86::VPORDZ128rm:
9203 case X86::VPORDZ256rr:
9204 case X86::VPORDZ256rm:
9205 case X86::VPORQZ128rr:
9206 case X86::VPORQZ128rm:
9207 case X86::VPORQZ256rr:
9208 case X86::VPORQZ256rm:
9209 case X86::VPXORDZ128rr:
9210 case X86::VPXORDZ128rm:
9211 case X86::VPXORDZ256rr:
9212 case X86::VPXORDZ256rm:
9213 case X86::VPXORQZ128rr:
9214 case X86::VPXORQZ128rm:
9215 case X86::VPXORQZ256rr:
9216 case X86::VPXORQZ256rm: {
9217 // Without DQI, convert EVEX instructions to VEX instructions.
9218 if (Subtarget.hasDQI())
9219 return false;
9220
9221 const uint16_t *table =
9222 lookupAVX512(MI.getOpcode(), dom, ReplaceableCustomAVX512LogicInstrs);
9223 assert(table && "Instruction not found in table?");
9224 // Don't change integer Q instructions to D instructions and
9225 // use D intructions if we started with a PS instruction.
9226 if (Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
9227 Domain = 4;
9228 MI.setDesc(get(table[Domain - 1]));
9229 return true;
9230 }
9231 case X86::UNPCKHPDrr:
9232 case X86::MOVHLPSrr:
9233 // We just need to commute the instruction which will switch the domains.
9234 if (Domain != dom && Domain != 3 &&
9235 MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
9236 MI.getOperand(0).getSubReg() == 0 &&
9237 MI.getOperand(1).getSubReg() == 0 &&
9238 MI.getOperand(2).getSubReg() == 0) {
9239 commuteInstruction(MI, false);
9240 return true;
9241 }
9242 // We must always return true for MOVHLPSrr.
9243 if (Opcode == X86::MOVHLPSrr)
9244 return true;
9245 break;
9246 case X86::SHUFPDrri: {
9247 if (Domain == 1) {
9248 unsigned Imm = MI.getOperand(3).getImm();
9249 unsigned NewImm = 0x44;
9250 if (Imm & 1)
9251 NewImm |= 0x0a;
9252 if (Imm & 2)
9253 NewImm |= 0xa0;
9254 MI.getOperand(3).setImm(NewImm);
9255 MI.setDesc(get(X86::SHUFPSrri));
9256 }
9257 return true;
9258 }
9259 }
9260 return false;
9261}
9262
9263std::pair<uint16_t, uint16_t>
9265 uint16_t domain = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
9266 unsigned opcode = MI.getOpcode();
9267 uint16_t validDomains = 0;
9268 if (domain) {
9269 // Attempt to match for custom instructions.
9270 validDomains = getExecutionDomainCustom(MI);
9271 if (validDomains)
9272 return std::make_pair(domain, validDomains);
9273
9274 if (lookup(opcode, domain, ReplaceableInstrs)) {
9275 validDomains = 0xe;
9276 } else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) {
9277 validDomains = Subtarget.hasAVX2() ? 0xe : 0x6;
9278 } else if (lookup(opcode, domain, ReplaceableInstrsFP)) {
9279 validDomains = 0x6;
9280 } else if (lookup(opcode, domain, ReplaceableInstrsAVX2InsertExtract)) {
9281 // Insert/extract instructions should only effect domain if AVX2
9282 // is enabled.
9283 if (!Subtarget.hasAVX2())
9284 return std::make_pair(0, 0);
9285 validDomains = 0xe;
9286 } else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512)) {
9287 validDomains = 0xe;
9288 } else if (Subtarget.hasDQI() &&
9289 lookupAVX512(opcode, domain, ReplaceableInstrsAVX512DQ)) {
9290 validDomains = 0xe;
9291 } else if (Subtarget.hasDQI()) {
9292 if (const uint16_t *table =
9293 lookupAVX512(opcode, domain, ReplaceableInstrsAVX512DQMasked)) {
9294 if (domain == 1 || (domain == 3 && table[3] == opcode))
9295 validDomains = 0xa;
9296 else
9297 validDomains = 0xc;
9298 }
9299 }
9300 }
9301 return std::make_pair(domain, validDomains);
9302}
9303
9305 assert(Domain > 0 && Domain < 4 && "Invalid execution domain");
9306 uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
9307 assert(dom && "Not an SSE instruction");
9308
9309 // Attempt to match for custom instructions.
9311 return;
9312
9313 const uint16_t *table = lookup(MI.getOpcode(), dom, ReplaceableInstrs);
9314 if (!table) { // try the other table
9315 assert((Subtarget.hasAVX2() || Domain < 3) &&
9316 "256-bit vector operations only available in AVX2");
9317 table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2);
9318 }
9319 if (!table) { // try the FP table
9320 table = lookup(MI.getOpcode(), dom, ReplaceableInstrsFP);
9321 assert((!table || Domain < 3) &&
9322 "Can only select PackedSingle or PackedDouble");
9323 }
9324 if (!table) { // try the other table
9325 assert(Subtarget.hasAVX2() &&
9326 "256-bit insert/extract only available in AVX2");
9327 table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2InsertExtract);
9328 }
9329 if (!table) { // try the AVX512 table
9330 assert(Subtarget.hasAVX512() && "Requires AVX-512");
9331 table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512);
9332 // Don't change integer Q instructions to D instructions.
9333 if (table && Domain == 3 && table[3] == MI.getOpcode())
9334 Domain = 4;
9335 }
9336 if (!table) { // try the AVX512DQ table
9337 assert((Subtarget.hasDQI() || Domain >= 3) && "Requires AVX-512DQ");
9338 table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512DQ);
9339 // Don't change integer Q instructions to D instructions and
9340 // use D instructions if we started with a PS instruction.
9341 if (table && Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
9342 Domain = 4;
9343 }
9344 if (!table) { // try the AVX512DQMasked table
9345 assert((Subtarget.hasDQI() || Domain >= 3) && "Requires AVX-512DQ");
9346 table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512DQMasked);
9347 if (table && Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
9348 Domain = 4;
9349 }
9350 assert(table && "Cannot change domain");
9351 MI.setDesc(get(table[Domain - 1]));
9352}
9353
9356 DebugLoc DL;
9357 BuildMI(MBB, MI, DL, get(X86::NOOP));
9358}
9359
9360/// Return the noop instruction to use for a noop.
9362 MCInst Nop;
9363 Nop.setOpcode(X86::NOOP);
9364 return Nop;
9365}
9366
9368 switch (opc) {
9369 default:
9370 return false;
9371 case X86::DIVPDrm:
9372 case X86::DIVPDrr:
9373 case X86::DIVPSrm:
9374 case X86::DIVPSrr:
9375 case X86::DIVSDrm:
9376 case X86::DIVSDrm_Int:
9377 case X86::DIVSDrr:
9378 case X86::DIVSDrr_Int:
9379 case X86::DIVSSrm:
9380 case X86::DIVSSrm_Int:
9381 case X86::DIVSSrr:
9382 case X86::DIVSSrr_Int:
9383 case X86::SQRTPDm:
9384 case X86::SQRTPDr:
9385 case X86::SQRTPSm:
9386 case X86::SQRTPSr:
9387 case X86::SQRTSDm:
9388 case X86::SQRTSDm_Int:
9389 case X86::SQRTSDr:
9390 case X86::SQRTSDr_Int:
9391 case X86::SQRTSSm:
9392 case X86::SQRTSSm_Int:
9393 case X86::SQRTSSr:
9394 case X86::SQRTSSr_Int:
9395 // AVX instructions with high latency
9396 case X86::VDIVPDrm:
9397 case X86::VDIVPDrr:
9398 case X86::VDIVPDYrm:
9399 case X86::VDIVPDYrr:
9400 case X86::VDIVPSrm:
9401 case X86::VDIVPSrr:
9402 case X86::VDIVPSYrm:
9403 case X86::VDIVPSYrr:
9404 case X86::VDIVSDrm:
9405 case X86::VDIVSDrm_Int:
9406 case X86::VDIVSDrr:
9407 case X86::VDIVSDrr_Int:
9408 case X86::VDIVSSrm:
9409 case X86::VDIVSSrm_Int:
9410 case X86::VDIVSSrr:
9411 case X86::VDIVSSrr_Int:
9412 case X86::VSQRTPDm:
9413 case X86::VSQRTPDr:
9414 case X86::VSQRTPDYm:
9415 case X86::VSQRTPDYr:
9416 case X86::VSQRTPSm:
9417 case X86::VSQRTPSr:
9418 case X86::VSQRTPSYm:
9419 case X86::VSQRTPSYr:
9420 case X86::VSQRTSDm:
9421 case X86::VSQRTSDm_Int:
9422 case X86::VSQRTSDr:
9423 case X86::VSQRTSDr_Int:
9424 case X86::VSQRTSSm:
9425 case X86::VSQRTSSm_Int:
9426 case X86::VSQRTSSr:
9427 case X86::VSQRTSSr_Int:
9428 // AVX512 instructions with high latency
9429 case X86::VDIVPDZ128rm:
9430 case X86::VDIVPDZ128rmb:
9431 case X86::VDIVPDZ128rmbk:
9432 case X86::VDIVPDZ128rmbkz:
9433 case X86::VDIVPDZ128rmk:
9434 case X86::VDIVPDZ128rmkz:
9435 case X86::VDIVPDZ128rr:
9436 case X86::VDIVPDZ128rrk:
9437 case X86::VDIVPDZ128rrkz:
9438 case X86::VDIVPDZ256rm:
9439 case X86::VDIVPDZ256rmb:
9440 case X86::VDIVPDZ256rmbk:
9441 case X86::VDIVPDZ256rmbkz:
9442 case X86::VDIVPDZ256rmk:
9443 case X86::VDIVPDZ256rmkz:
9444 case X86::VDIVPDZ256rr:
9445 case X86::VDIVPDZ256rrk:
9446 case X86::VDIVPDZ256rrkz:
9447 case X86::VDIVPDZrrb:
9448 case X86::VDIVPDZrrbk:
9449 case X86::VDIVPDZrrbkz:
9450 case X86::VDIVPDZrm:
9451 case X86::VDIVPDZrmb:
9452 case X86::VDIVPDZrmbk:
9453 case X86::VDIVPDZrmbkz:
9454 case X86::VDIVPDZrmk:
9455 case X86::VDIVPDZrmkz:
9456 case X86::VDIVPDZrr:
9457 case X86::VDIVPDZrrk:
9458 case X86::VDIVPDZrrkz:
9459 case X86::VDIVPSZ128rm:
9460 case X86::VDIVPSZ128rmb:
9461 case X86::VDIVPSZ128rmbk:
9462 case X86::VDIVPSZ128rmbkz:
9463 case X86::VDIVPSZ128rmk:
9464 case X86::VDIVPSZ128rmkz:
9465 case X86::VDIVPSZ128rr:
9466 case X86::VDIVPSZ128rrk:
9467 case X86::VDIVPSZ128rrkz:
9468 case X86::VDIVPSZ256rm:
9469 case X86::VDIVPSZ256rmb:
9470 case X86::VDIVPSZ256rmbk:
9471 case X86::VDIVPSZ256rmbkz:
9472 case X86::VDIVPSZ256rmk:
9473 case X86::VDIVPSZ256rmkz:
9474 case X86::VDIVPSZ256rr:
9475 case X86::VDIVPSZ256rrk:
9476 case X86::VDIVPSZ256rrkz:
9477 case X86::VDIVPSZrrb:
9478 case X86::VDIVPSZrrbk:
9479 case X86::VDIVPSZrrbkz:
9480 case X86::VDIVPSZrm:
9481 case X86::VDIVPSZrmb:
9482 case X86::VDIVPSZrmbk:
9483 case X86::VDIVPSZrmbkz:
9484 case X86::VDIVPSZrmk:
9485 case X86::VDIVPSZrmkz:
9486 case X86::VDIVPSZrr:
9487 case X86::VDIVPSZrrk:
9488 case X86::VDIVPSZrrkz:
9489 case X86::VDIVSDZrm:
9490 case X86::VDIVSDZrr:
9491 case X86::VDIVSDZrm_Int:
9492 case X86::VDIVSDZrm_Intk:
9493 case X86::VDIVSDZrm_Intkz:
9494 case X86::VDIVSDZrr_Int:
9495 case X86::VDIVSDZrr_Intk:
9496 case X86::VDIVSDZrr_Intkz:
9497 case X86::VDIVSDZrrb_Int:
9498 case X86::VDIVSDZrrb_Intk:
9499 case X86::VDIVSDZrrb_Intkz:
9500 case X86::VDIVSSZrm:
9501 case X86::VDIVSSZrr:
9502 case X86::VDIVSSZrm_Int:
9503 case X86::VDIVSSZrm_Intk:
9504 case X86::VDIVSSZrm_Intkz:
9505 case X86::VDIVSSZrr_Int:
9506 case X86::VDIVSSZrr_Intk:
9507 case X86::VDIVSSZrr_Intkz:
9508 case X86::VDIVSSZrrb_Int:
9509 case X86::VDIVSSZrrb_Intk:
9510 case X86::VDIVSSZrrb_Intkz:
9511 case X86::VSQRTPDZ128m:
9512 case X86::VSQRTPDZ128mb:
9513 case X86::VSQRTPDZ128mbk:
9514 case X86::VSQRTPDZ128mbkz:
9515 case X86::VSQRTPDZ128mk:
9516 case X86::VSQRTPDZ128mkz:
9517 case X86::VSQRTPDZ128r:
9518 case X86::VSQRTPDZ128rk:
9519 case X86::VSQRTPDZ128rkz:
9520 case X86::VSQRTPDZ256m:
9521 case X86::VSQRTPDZ256mb:
9522 case X86::VSQRTPDZ256mbk:
9523 case X86::VSQRTPDZ256mbkz:
9524 case X86::VSQRTPDZ256mk:
9525 case X86::VSQRTPDZ256mkz:
9526 case X86::VSQRTPDZ256r:
9527 case X86::VSQRTPDZ256rk:
9528 case X86::VSQRTPDZ256rkz:
9529 case X86::VSQRTPDZm:
9530 case X86::VSQRTPDZmb:
9531 case X86::VSQRTPDZmbk:
9532 case X86::VSQRTPDZmbkz:
9533 case X86::VSQRTPDZmk:
9534 case X86::VSQRTPDZmkz:
9535 case X86::VSQRTPDZr:
9536 case X86::VSQRTPDZrb:
9537 case X86::VSQRTPDZrbk:
9538 case X86::VSQRTPDZrbkz:
9539 case X86::VSQRTPDZrk:
9540 case X86::VSQRTPDZrkz:
9541 case X86::VSQRTPSZ128m:
9542 case X86::VSQRTPSZ128mb:
9543 case X86::VSQRTPSZ128mbk:
9544 case X86::VSQRTPSZ128mbkz:
9545 case X86::VSQRTPSZ128mk:
9546 case X86::VSQRTPSZ128mkz:
9547 case X86::VSQRTPSZ128r:
9548 case X86::VSQRTPSZ128rk:
9549 case X86::VSQRTPSZ128rkz:
9550 case X86::VSQRTPSZ256m:
9551 case X86::VSQRTPSZ256mb:
9552 case X86::VSQRTPSZ256mbk:
9553 case X86::VSQRTPSZ256mbkz:
9554 case X86::VSQRTPSZ256mk:
9555 case X86::VSQRTPSZ256mkz:
9556 case X86::VSQRTPSZ256r:
9557 case X86::VSQRTPSZ256rk:
9558 case X86::VSQRTPSZ256rkz:
9559 case X86::VSQRTPSZm:
9560 case X86::VSQRTPSZmb:
9561 case X86::VSQRTPSZmbk:
9562 case X86::VSQRTPSZmbkz:
9563 case X86::VSQRTPSZmk:
9564 case X86::VSQRTPSZmkz:
9565 case X86::VSQRTPSZr:
9566 case X86::VSQRTPSZrb:
9567 case X86::VSQRTPSZrbk:
9568 case X86::VSQRTPSZrbkz:
9569 case X86::VSQRTPSZrk:
9570 case X86::VSQRTPSZrkz:
9571 case X86::VSQRTSDZm:
9572 case X86::VSQRTSDZm_Int:
9573 case X86::VSQRTSDZm_Intk:
9574 case X86::VSQRTSDZm_Intkz:
9575 case X86::VSQRTSDZr:
9576 case X86::VSQRTSDZr_Int:
9577 case X86::VSQRTSDZr_Intk:
9578 case X86::VSQRTSDZr_Intkz:
9579 case X86::VSQRTSDZrb_Int:
9580 case X86::VSQRTSDZrb_Intk:
9581 case X86::VSQRTSDZrb_Intkz:
9582 case X86::VSQRTSSZm:
9583 case X86::VSQRTSSZm_Int:
9584 case X86::VSQRTSSZm_Intk:
9585 case X86::VSQRTSSZm_Intkz:
9586 case X86::VSQRTSSZr:
9587 case X86::VSQRTSSZr_Int:
9588 case X86::VSQRTSSZr_Intk:
9589 case X86::VSQRTSSZr_Intkz:
9590 case X86::VSQRTSSZrb_Int:
9591 case X86::VSQRTSSZrb_Intk:
9592 case X86::VSQRTSSZrb_Intkz:
9593
9594 case X86::VGATHERDPDYrm:
9595 case X86::VGATHERDPDZ128rm:
9596 case X86::VGATHERDPDZ256rm:
9597 case X86::VGATHERDPDZrm:
9598 case X86::VGATHERDPDrm:
9599 case X86::VGATHERDPSYrm:
9600 case X86::VGATHERDPSZ128rm:
9601 case X86::VGATHERDPSZ256rm:
9602 case X86::VGATHERDPSZrm:
9603 case X86::VGATHERDPSrm:
9604 case X86::VGATHERPF0DPDm:
9605 case X86::VGATHERPF0DPSm:
9606 case X86::VGATHERPF0QPDm:
9607 case X86::VGATHERPF0QPSm:
9608 case X86::VGATHERPF1DPDm:
9609 case X86::VGATHERPF1DPSm:
9610 case X86::VGATHERPF1QPDm:
9611 case X86::VGATHERPF1QPSm:
9612 case X86::VGATHERQPDYrm:
9613 case X86::VGATHERQPDZ128rm:
9614 case X86::VGATHERQPDZ256rm:
9615 case X86::VGATHERQPDZrm:
9616 case X86::VGATHERQPDrm:
9617 case X86::VGATHERQPSYrm:
9618 case X86::VGATHERQPSZ128rm:
9619 case X86::VGATHERQPSZ256rm:
9620 case X86::VGATHERQPSZrm:
9621 case X86::VGATHERQPSrm:
9622 case X86::VPGATHERDDYrm:
9623 case X86::VPGATHERDDZ128rm:
9624 case X86::VPGATHERDDZ256rm:
9625 case X86::VPGATHERDDZrm:
9626 case X86::VPGATHERDDrm:
9627 case X86::VPGATHERDQYrm:
9628 case X86::VPGATHERDQZ128rm:
9629 case X86::VPGATHERDQZ256rm:
9630 case X86::VPGATHERDQZrm:
9631 case X86::VPGATHERDQrm:
9632 case X86::VPGATHERQDYrm:
9633 case X86::VPGATHERQDZ128rm:
9634 case X86::VPGATHERQDZ256rm:
9635 case X86::VPGATHERQDZrm:
9636 case X86::VPGATHERQDrm:
9637 case X86::VPGATHERQQYrm:
9638 case X86::VPGATHERQQZ128rm:
9639 case X86::VPGATHERQQZ256rm:
9640 case X86::VPGATHERQQZrm:
9641 case X86::VPGATHERQQrm:
9642 case X86::VSCATTERDPDZ128mr:
9643 case X86::VSCATTERDPDZ256mr:
9644 case X86::VSCATTERDPDZmr:
9645 case X86::VSCATTERDPSZ128mr:
9646 case X86::VSCATTERDPSZ256mr:
9647 case X86::VSCATTERDPSZmr:
9648 case X86::VSCATTERPF0DPDm:
9649 case X86::VSCATTERPF0DPSm:
9650 case X86::VSCATTERPF0QPDm:
9651 case X86::VSCATTERPF0QPSm:
9652 case X86::VSCATTERPF1DPDm:
9653 case X86::VSCATTERPF1DPSm:
9654 case X86::VSCATTERPF1QPDm:
9655 case X86::VSCATTERPF1QPSm:
9656 case X86::VSCATTERQPDZ128mr:
9657 case X86::VSCATTERQPDZ256mr:
9658 case X86::VSCATTERQPDZmr:
9659 case X86::VSCATTERQPSZ128mr:
9660 case X86::VSCATTERQPSZ256mr:
9661 case X86::VSCATTERQPSZmr:
9662 case X86::VPSCATTERDDZ128mr:
9663 case X86::VPSCATTERDDZ256mr:
9664 case X86::VPSCATTERDDZmr:
9665 case X86::VPSCATTERDQZ128mr:
9666 case X86::VPSCATTERDQZ256mr:
9667 case X86::VPSCATTERDQZmr:
9668 case X86::VPSCATTERQDZ128mr:
9669 case X86::VPSCATTERQDZ256mr:
9670 case X86::VPSCATTERQDZmr:
9671 case X86::VPSCATTERQQZ128mr:
9672 case X86::VPSCATTERQQZ256mr:
9673 case X86::VPSCATTERQQZmr:
9674 return true;
9675 }
9676}
9677
9679 const MachineRegisterInfo *MRI,
9680 const MachineInstr &DefMI,
9681 unsigned DefIdx,
9682 const MachineInstr &UseMI,
9683 unsigned UseIdx) const {
9684 return isHighLatencyDef(DefMI.getOpcode());
9685}
9686
9688 const MachineBasicBlock *MBB) const {
9689 assert(Inst.getNumExplicitOperands() == 3 && Inst.getNumExplicitDefs() == 1 &&
9690 Inst.getNumDefs() <= 2 && "Reassociation needs binary operators");
9691
9692 // Integer binary math/logic instructions have a third source operand:
9693 // the EFLAGS register. That operand must be both defined here and never
9694 // used; ie, it must be dead. If the EFLAGS operand is live, then we can
9695 // not change anything because rearranging the operands could affect other
9696 // instructions that depend on the exact status flags (zero, sign, etc.)
9697 // that are set by using these particular operands with this operation.
9698 const MachineOperand *FlagDef =
9699 Inst.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
9700 assert((Inst.getNumDefs() == 1 || FlagDef) && "Implicit def isn't flags?");
9701 if (FlagDef && !FlagDef->isDead())
9702 return false;
9703
9705}
9706
9707// TODO: There are many more machine instruction opcodes to match:
9708// 1. Other data types (integer, vectors)
9709// 2. Other math / logic operations (xor, or)
9710// 3. Other forms of the same operation (intrinsics and other variants)
9712 bool Invert) const {
9713 if (Invert)
9714 return false;
9715 switch (Inst.getOpcode()) {
9716 CASE_ND(ADD8rr)
9717 CASE_ND(ADD16rr)
9718 CASE_ND(ADD32rr)
9719 CASE_ND(ADD64rr)
9720 CASE_ND(AND8rr)
9721 CASE_ND(AND16rr)
9722 CASE_ND(AND32rr)
9723 CASE_ND(AND64rr)
9724 CASE_ND(OR8rr)
9725 CASE_ND(OR16rr)
9726 CASE_ND(OR32rr)
9727 CASE_ND(OR64rr)
9728 CASE_ND(XOR8rr)
9729 CASE_ND(XOR16rr)
9730 CASE_ND(XOR32rr)
9731 CASE_ND(XOR64rr)
9732 CASE_ND(IMUL16rr)
9733 CASE_ND(IMUL32rr)
9734 CASE_ND(IMUL64rr)
9735 case X86::PANDrr:
9736 case X86::PORrr:
9737 case X86::PXORrr:
9738 case X86::ANDPDrr:
9739 case X86::ANDPSrr:
9740 case X86::ORPDrr:
9741 case X86::ORPSrr:
9742 case X86::XORPDrr:
9743 case X86::XORPSrr:
9744 case X86::PADDBrr:
9745 case X86::PADDWrr:
9746 case X86::PADDDrr:
9747 case X86::PADDQrr:
9748 case X86::PMULLWrr:
9749 case X86::PMULLDrr:
9750 case X86::PMAXSBrr:
9751 case X86::PMAXSDrr:
9752 case X86::PMAXSWrr:
9753 case X86::PMAXUBrr:
9754 case X86::PMAXUDrr:
9755 case X86::PMAXUWrr:
9756 case X86::PMINSBrr:
9757 case X86::PMINSDrr:
9758 case X86::PMINSWrr:
9759 case X86::PMINUBrr:
9760 case X86::PMINUDrr:
9761 case X86::PMINUWrr:
9762 case X86::VPANDrr:
9763 case X86::VPANDYrr:
9764 case X86::VPANDDZ128rr:
9765 case X86::VPANDDZ256rr:
9766 case X86::VPANDDZrr:
9767 case X86::VPANDQZ128rr:
9768 case X86::VPANDQZ256rr:
9769 case X86::VPANDQZrr:
9770 case X86::VPORrr:
9771 case X86::VPORYrr:
9772 case X86::VPORDZ128rr:
9773 case X86::VPORDZ256rr:
9774 case X86::VPORDZrr:
9775 case X86::VPORQZ128rr:
9776 case X86::VPORQZ256rr:
9777 case X86::VPORQZrr:
9778 case X86::VPXORrr:
9779 case X86::VPXORYrr:
9780 case X86::VPXORDZ128rr:
9781 case X86::VPXORDZ256rr:
9782 case X86::VPXORDZrr:
9783 case X86::VPXORQZ128rr:
9784 case X86::VPXORQZ256rr:
9785 case X86::VPXORQZrr:
9786 case X86::VANDPDrr:
9787 case X86::VANDPSrr:
9788 case X86::VANDPDYrr:
9789 case X86::VANDPSYrr:
9790 case X86::VANDPDZ128rr:
9791 case X86::VANDPSZ128rr:
9792 case X86::VANDPDZ256rr:
9793 case X86::VANDPSZ256rr:
9794 case X86::VANDPDZrr:
9795 case X86::VANDPSZrr:
9796 case X86::VORPDrr:
9797 case X86::VORPSrr:
9798 case X86::VORPDYrr:
9799 case X86::VORPSYrr:
9800 case X86::VORPDZ128rr:
9801 case X86::VORPSZ128rr:
9802 case X86::VORPDZ256rr:
9803 case X86::VORPSZ256rr:
9804 case X86::VORPDZrr:
9805 case X86::VORPSZrr:
9806 case X86::VXORPDrr:
9807 case X86::VXORPSrr:
9808 case X86::VXORPDYrr:
9809 case X86::VXORPSYrr:
9810 case X86::VXORPDZ128rr:
9811 case X86::VXORPSZ128rr:
9812 case X86::VXORPDZ256rr:
9813 case X86::VXORPSZ256rr:
9814 case X86::VXORPDZrr:
9815 case X86::VXORPSZrr:
9816 case X86::KADDBkk:
9817 case X86::KADDWkk:
9818 case X86::KADDDkk:
9819 case X86::KADDQkk:
9820 case X86::KANDBkk:
9821 case X86::KANDWkk:
9822 case X86::KANDDkk:
9823 case X86::KANDQkk:
9824 case X86::KORBkk:
9825 case X86::KORWkk:
9826 case X86::KORDkk:
9827 case X86::KORQkk:
9828 case X86::KXORBkk:
9829 case X86::KXORWkk:
9830 case X86::KXORDkk:
9831 case X86::KXORQkk:
9832 case X86::VPADDBrr:
9833 case X86::VPADDWrr:
9834 case X86::VPADDDrr:
9835 case X86::VPADDQrr:
9836 case X86::VPADDBYrr:
9837 case X86::VPADDWYrr:
9838 case X86::VPADDDYrr:
9839 case X86::VPADDQYrr:
9840 case X86::VPADDBZ128rr:
9841 case X86::VPADDWZ128rr:
9842 case X86::VPADDDZ128rr:
9843 case X86::VPADDQZ128rr:
9844 case X86::VPADDBZ256rr:
9845 case X86::VPADDWZ256rr:
9846 case X86::VPADDDZ256rr:
9847 case X86::VPADDQZ256rr:
9848 case X86::VPADDBZrr:
9849 case X86::VPADDWZrr:
9850 case X86::VPADDDZrr:
9851 case X86::VPADDQZrr:
9852 case X86::VPMULLWrr:
9853 case X86::VPMULLWYrr:
9854 case X86::VPMULLWZ128rr:
9855 case X86::VPMULLWZ256rr:
9856 case X86::VPMULLWZrr:
9857 case X86::VPMULLDrr:
9858 case X86::VPMULLDYrr:
9859 case X86::VPMULLDZ128rr:
9860 case X86::VPMULLDZ256rr:
9861 case X86::VPMULLDZrr:
9862 case X86::VPMULLQZ128rr:
9863 case X86::VPMULLQZ256rr:
9864 case X86::VPMULLQZrr:
9865 case X86::VPMAXSBrr:
9866 case X86::VPMAXSBYrr:
9867 case X86::VPMAXSBZ128rr:
9868 case X86::VPMAXSBZ256rr:
9869 case X86::VPMAXSBZrr:
9870 case X86::VPMAXSDrr:
9871 case X86::VPMAXSDYrr:
9872 case X86::VPMAXSDZ128rr:
9873 case X86::VPMAXSDZ256rr:
9874 case X86::VPMAXSDZrr:
9875 case X86::VPMAXSQZ128rr:
9876 case X86::VPMAXSQZ256rr:
9877 case X86::VPMAXSQZrr:
9878 case X86::VPMAXSWrr:
9879 case X86::VPMAXSWYrr:
9880 case X86::VPMAXSWZ128rr:
9881 case X86::VPMAXSWZ256rr:
9882 case X86::VPMAXSWZrr:
9883 case X86::VPMAXUBrr:
9884 case X86::VPMAXUBYrr:
9885 case X86::VPMAXUBZ128rr:
9886 case X86::VPMAXUBZ256rr:
9887 case X86::VPMAXUBZrr:
9888 case X86::VPMAXUDrr:
9889 case X86::VPMAXUDYrr:
9890 case X86::VPMAXUDZ128rr:
9891 case X86::VPMAXUDZ256rr:
9892 case X86::VPMAXUDZrr:
9893 case X86::VPMAXUQZ128rr:
9894 case X86::VPMAXUQZ256rr:
9895 case X86::VPMAXUQZrr:
9896 case X86::VPMAXUWrr:
9897 case X86::VPMAXUWYrr:
9898 case X86::VPMAXUWZ128rr:
9899 case X86::VPMAXUWZ256rr:
9900 case X86::VPMAXUWZrr:
9901 case X86::VPMINSBrr:
9902 case X86::VPMINSBYrr:
9903 case X86::VPMINSBZ128rr:
9904 case X86::VPMINSBZ256rr:
9905 case X86::VPMINSBZrr:
9906 case X86::VPMINSDrr:
9907 case X86::VPMINSDYrr:
9908 case X86::VPMINSDZ128rr:
9909 case X86::VPMINSDZ256rr:
9910 case X86::VPMINSDZrr:
9911 case X86::VPMINSQZ128rr:
9912 case X86::VPMINSQZ256rr:
9913 case X86::VPMINSQZrr:
9914 case X86::VPMINSWrr:
9915 case X86::VPMINSWYrr:
9916 case X86::VPMINSWZ128rr:
9917 case X86::VPMINSWZ256rr:
9918 case X86::VPMINSWZrr:
9919 case X86::VPMINUBrr:
9920 case X86::VPMINUBYrr:
9921 case X86::VPMINUBZ128rr:
9922 case X86::VPMINUBZ256rr:
9923 case X86::VPMINUBZrr:
9924 case X86::VPMINUDrr:
9925 case X86::VPMINUDYrr:
9926 case X86::VPMINUDZ128rr:
9927 case X86::VPMINUDZ256rr:
9928 case X86::VPMINUDZrr:
9929 case X86::VPMINUQZ128rr:
9930 case X86::VPMINUQZ256rr:
9931 case X86::VPMINUQZrr:
9932 case X86::VPMINUWrr:
9933 case X86::VPMINUWYrr:
9934 case X86::VPMINUWZ128rr:
9935 case X86::VPMINUWZ256rr:
9936 case X86::VPMINUWZrr:
9937 // Normal min/max instructions are not commutative because of NaN and signed
9938 // zero semantics, but these are. Thus, there's no need to check for global
9939 // relaxed math; the instructions themselves have the properties we need.
9940 case X86::MAXCPDrr:
9941 case X86::MAXCPSrr:
9942 case X86::MAXCSDrr:
9943 case X86::MAXCSSrr:
9944 case X86::MINCPDrr:
9945 case X86::MINCPSrr:
9946 case X86::MINCSDrr:
9947 case X86::MINCSSrr:
9948 case X86::VMAXCPDrr:
9949 case X86::VMAXCPSrr:
9950 case X86::VMAXCPDYrr:
9951 case X86::VMAXCPSYrr:
9952 case X86::VMAXCPDZ128rr:
9953 case X86::VMAXCPSZ128rr:
9954 case X86::VMAXCPDZ256rr:
9955 case X86::VMAXCPSZ256rr:
9956 case X86::VMAXCPDZrr:
9957 case X86::VMAXCPSZrr:
9958 case X86::VMAXCSDrr:
9959 case X86::VMAXCSSrr:
9960 case X86::VMAXCSDZrr:
9961 case X86::VMAXCSSZrr:
9962 case X86::VMINCPDrr:
9963 case X86::VMINCPSrr:
9964 case X86::VMINCPDYrr:
9965 case X86::VMINCPSYrr:
9966 case X86::VMINCPDZ128rr:
9967 case X86::VMINCPSZ128rr:
9968 case X86::VMINCPDZ256rr:
9969 case X86::VMINCPSZ256rr:
9970 case X86::VMINCPDZrr:
9971 case X86::VMINCPSZrr:
9972 case X86::VMINCSDrr:
9973 case X86::VMINCSSrr:
9974 case X86::VMINCSDZrr:
9975 case X86::VMINCSSZrr:
9976 case X86::VMAXCPHZ128rr:
9977 case X86::VMAXCPHZ256rr:
9978 case X86::VMAXCPHZrr:
9979 case X86::VMAXCSHZrr:
9980 case X86::VMINCPHZ128rr:
9981 case X86::VMINCPHZ256rr:
9982 case X86::VMINCPHZrr:
9983 case X86::VMINCSHZrr:
9984 return true;
9985 case X86::ADDPDrr:
9986 case X86::ADDPSrr:
9987 case X86::ADDSDrr:
9988 case X86::ADDSSrr:
9989 case X86::MULPDrr:
9990 case X86::MULPSrr:
9991 case X86::MULSDrr:
9992 case X86::MULSSrr:
9993 case X86::VADDPDrr:
9994 case X86::VADDPSrr:
9995 case X86::VADDPDYrr:
9996 case X86::VADDPSYrr:
9997 case X86::VADDPDZ128rr:
9998 case X86::VADDPSZ128rr:
9999 case X86::VADDPDZ256rr:
10000 case X86::VADDPSZ256rr:
10001 case X86::VADDPDZrr:
10002 case X86::VADDPSZrr:
10003 case X86::VADDSDrr:
10004 case X86::VADDSSrr:
10005 case X86::VADDSDZrr:
10006 case X86::VADDSSZrr:
10007 case X86::VMULPDrr:
10008 case X86::VMULPSrr:
10009 case X86::VMULPDYrr:
10010 case X86::VMULPSYrr:
10011 case X86::VMULPDZ128rr:
10012 case X86::VMULPSZ128rr:
10013 case X86::VMULPDZ256rr:
10014 case X86::VMULPSZ256rr:
10015 case X86::VMULPDZrr:
10016 case X86::VMULPSZrr:
10017 case X86::VMULSDrr:
10018 case X86::VMULSSrr:
10019 case X86::VMULSDZrr:
10020 case X86::VMULSSZrr:
10021 case X86::VADDPHZ128rr:
10022 case X86::VADDPHZ256rr:
10023 case X86::VADDPHZrr:
10024 case X86::VADDSHZrr:
10025 case X86::VMULPHZ128rr:
10026 case X86::VMULPHZ256rr:
10027 case X86::VMULPHZrr:
10028 case X86::VMULSHZrr:
10031 default:
10032 return false;
10033 }
10034}
10035
10036/// If \p DescribedReg overlaps with the MOVrr instruction's destination
10037/// register then, if possible, describe the value in terms of the source
10038/// register.
10039static std::optional<ParamLoadedValue>
10041 const TargetRegisterInfo *TRI) {
10042 Register DestReg = MI.getOperand(0).getReg();
10043 Register SrcReg = MI.getOperand(1).getReg();
10044
10045 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
10046
10047 // If the described register is the destination, just return the source.
10048 if (DestReg == DescribedReg)
10049 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10050
10051 // If the described register is a sub-register of the destination register,
10052 // then pick out the source register's corresponding sub-register.
10053 if (unsigned SubRegIdx = TRI->getSubRegIndex(DestReg, DescribedReg)) {
10054 Register SrcSubReg = TRI->getSubReg(SrcReg, SubRegIdx);
10055 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
10056 }
10057
10058 // The remaining case to consider is when the described register is a
10059 // super-register of the destination register. MOV8rr and MOV16rr does not
10060 // write to any of the other bytes in the register, meaning that we'd have to
10061 // describe the value using a combination of the source register and the
10062 // non-overlapping bits in the described register, which is not currently
10063 // possible.
10064 if (MI.getOpcode() == X86::MOV8rr || MI.getOpcode() == X86::MOV16rr ||
10065 !TRI->isSuperRegister(DestReg, DescribedReg))
10066 return std::nullopt;
10067
10068 assert(MI.getOpcode() == X86::MOV32rr && "Unexpected super-register case");
10069 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10070}
10071
10072std::optional<ParamLoadedValue>
10074 const MachineOperand *Op = nullptr;
10075 DIExpression *Expr = nullptr;
10076
10078
10079 switch (MI.getOpcode()) {
10080 case X86::LEA32r:
10081 case X86::LEA64r:
10082 case X86::LEA64_32r: {
10083 // We may need to describe a 64-bit parameter with a 32-bit LEA.
10084 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
10085 return std::nullopt;
10086
10087 // Operand 4 could be global address. For now we do not support
10088 // such situation.
10089 if (!MI.getOperand(4).isImm() || !MI.getOperand(2).isImm())
10090 return std::nullopt;
10091
10092 const MachineOperand &Op1 = MI.getOperand(1);
10093 const MachineOperand &Op2 = MI.getOperand(3);
10094 assert(Op2.isReg() &&
10095 (Op2.getReg() == X86::NoRegister || Op2.getReg().isPhysical()));
10096
10097 // Omit situations like:
10098 // %rsi = lea %rsi, 4, ...
10099 if ((Op1.isReg() && Op1.getReg() == MI.getOperand(0).getReg()) ||
10100 Op2.getReg() == MI.getOperand(0).getReg())
10101 return std::nullopt;
10102 else if ((Op1.isReg() && Op1.getReg() != X86::NoRegister &&
10103 TRI->regsOverlap(Op1.getReg(), MI.getOperand(0).getReg())) ||
10104 (Op2.getReg() != X86::NoRegister &&
10105 TRI->regsOverlap(Op2.getReg(), MI.getOperand(0).getReg())))
10106 return std::nullopt;
10107
10108 int64_t Coef = MI.getOperand(2).getImm();
10109 int64_t Offset = MI.getOperand(4).getImm();
10111
10112 if ((Op1.isReg() && Op1.getReg() != X86::NoRegister)) {
10113 Op = &Op1;
10114 } else if (Op1.isFI())
10115 Op = &Op1;
10116
10117 if (Op && Op->isReg() && Op->getReg() == Op2.getReg() && Coef > 0) {
10118 Ops.push_back(dwarf::DW_OP_constu);
10119 Ops.push_back(Coef + 1);
10120 Ops.push_back(dwarf::DW_OP_mul);
10121 } else {
10122 if (Op && Op2.getReg() != X86::NoRegister) {
10123 int dwarfReg = TRI->getDwarfRegNum(Op2.getReg(), false);
10124 if (dwarfReg < 0)
10125 return std::nullopt;
10126 else if (dwarfReg < 32) {
10127 Ops.push_back(dwarf::DW_OP_breg0 + dwarfReg);
10128 Ops.push_back(0);
10129 } else {
10130 Ops.push_back(dwarf::DW_OP_bregx);
10131 Ops.push_back(dwarfReg);
10132 Ops.push_back(0);
10133 }
10134 } else if (!Op) {
10135 assert(Op2.getReg() != X86::NoRegister);
10136 Op = &Op2;
10137 }
10138
10139 if (Coef > 1) {
10140 assert(Op2.getReg() != X86::NoRegister);
10141 Ops.push_back(dwarf::DW_OP_constu);
10142 Ops.push_back(Coef);
10143 Ops.push_back(dwarf::DW_OP_mul);
10144 }
10145
10146 if (((Op1.isReg() && Op1.getReg() != X86::NoRegister) || Op1.isFI()) &&
10147 Op2.getReg() != X86::NoRegister) {
10148 Ops.push_back(dwarf::DW_OP_plus);
10149 }
10150 }
10151
10153 Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), Ops);
10154
10155 return ParamLoadedValue(*Op, Expr);
10156 }
10157 case X86::MOV8ri:
10158 case X86::MOV16ri:
10159 // TODO: Handle MOV8ri and MOV16ri.
10160 return std::nullopt;
10161 case X86::MOV32ri:
10162 case X86::MOV64ri:
10163 case X86::MOV64ri32:
10164 // MOV32ri may be used for producing zero-extended 32-bit immediates in
10165 // 64-bit parameters, so we need to consider super-registers.
10166 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
10167 return std::nullopt;
10168 return ParamLoadedValue(MI.getOperand(1), Expr);
10169 case X86::MOV8rr:
10170 case X86::MOV16rr:
10171 case X86::MOV32rr:
10172 case X86::MOV64rr:
10173 return describeMOVrrLoadedValue(MI, Reg, TRI);
10174 case X86::XOR32rr: {
10175 // 64-bit parameters are zero-materialized using XOR32rr, so also consider
10176 // super-registers.
10177 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
10178 return std::nullopt;
10179 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
10181 return std::nullopt;
10182 }
10183 case X86::MOVSX64rr32: {
10184 // We may need to describe the lower 32 bits of the MOVSX; for example, in
10185 // cases like this:
10186 //
10187 // $ebx = [...]
10188 // $rdi = MOVSX64rr32 $ebx
10189 // $esi = MOV32rr $edi
10190 if (!TRI->isSubRegisterEq(MI.getOperand(0).getReg(), Reg))
10191 return std::nullopt;
10192
10193 Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
10194
10195 // If the described register is the destination register we need to
10196 // sign-extend the source register from 32 bits. The other case we handle
10197 // is when the described register is the 32-bit sub-register of the
10198 // destination register, in case we just need to return the source
10199 // register.
10200 if (Reg == MI.getOperand(0).getReg())
10201 Expr = DIExpression::appendExt(Expr, 32, 64, true);
10202 else
10203 assert(X86MCRegisterClasses[X86::GR32RegClassID].contains(Reg) &&
10204 "Unhandled sub-register case for MOVSX64rr32");
10205
10206 return ParamLoadedValue(MI.getOperand(1), Expr);
10207 }
10208 default:
10209 assert(!MI.isMoveImmediate() && "Unexpected MoveImm instruction");
10211 }
10212}
10213
10214/// This is an architecture-specific helper function of reassociateOps.
10215/// Set special operand attributes for new instructions after reassociation.
10217 MachineInstr &OldMI2,
10218 MachineInstr &NewMI1,
10219 MachineInstr &NewMI2) const {
10220 // Integer instructions may define an implicit EFLAGS dest register operand.
10221 MachineOperand *OldFlagDef1 =
10222 OldMI1.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
10223 MachineOperand *OldFlagDef2 =
10224 OldMI2.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
10225
10226 assert(!OldFlagDef1 == !OldFlagDef2 &&
10227 "Unexpected instruction type for reassociation");
10228
10229 if (!OldFlagDef1 || !OldFlagDef2)
10230 return;
10231
10232 assert(OldFlagDef1->isDead() && OldFlagDef2->isDead() &&
10233 "Must have dead EFLAGS operand in reassociable instruction");
10234
10235 MachineOperand *NewFlagDef1 =
10236 NewMI1.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
10237 MachineOperand *NewFlagDef2 =
10238 NewMI2.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
10239
10240 assert(NewFlagDef1 && NewFlagDef2 &&
10241 "Unexpected operand in reassociable instruction");
10242
10243 // Mark the new EFLAGS operands as dead to be helpful to subsequent iterations
10244 // of this pass or other passes. The EFLAGS operands must be dead in these new
10245 // instructions because the EFLAGS operands in the original instructions must
10246 // be dead in order for reassociation to occur.
10247 NewFlagDef1->setIsDead();
10248 NewFlagDef2->setIsDead();
10249}
10250
10251std::pair<unsigned, unsigned>
10253 return std::make_pair(TF, 0u);
10254}
10255
10258 using namespace X86II;
10259 static const std::pair<unsigned, const char *> TargetFlags[] = {
10260 {MO_GOT_ABSOLUTE_ADDRESS, "x86-got-absolute-address"},
10261 {MO_PIC_BASE_OFFSET, "x86-pic-base-offset"},
10262 {MO_GOT, "x86-got"},
10263 {MO_GOTOFF, "x86-gotoff"},
10264 {MO_GOTPCREL, "x86-gotpcrel"},
10265 {MO_GOTPCREL_NORELAX, "x86-gotpcrel-norelax"},
10266 {MO_PLT, "x86-plt"},
10267 {MO_TLSGD, "x86-tlsgd"},
10268 {MO_TLSLD, "x86-tlsld"},
10269 {MO_TLSLDM, "x86-tlsldm"},
10270 {MO_GOTTPOFF, "x86-gottpoff"},
10271 {MO_INDNTPOFF, "x86-indntpoff"},
10272 {MO_TPOFF, "x86-tpoff"},
10273 {MO_DTPOFF, "x86-dtpoff"},
10274 {MO_NTPOFF, "x86-ntpoff"},
10275 {MO_GOTNTPOFF, "x86-gotntpoff"},
10276 {MO_DLLIMPORT, "x86-dllimport"},
10277 {MO_DARWIN_NONLAZY, "x86-darwin-nonlazy"},
10278 {MO_DARWIN_NONLAZY_PIC_BASE, "x86-darwin-nonlazy-pic-base"},
10279 {MO_TLVP, "x86-tlvp"},
10280 {MO_TLVP_PIC_BASE, "x86-tlvp-pic-base"},
10281 {MO_SECREL, "x86-secrel"},
10282 {MO_COFFSTUB, "x86-coffstub"}};
10283 return ArrayRef(TargetFlags);
10284}
10285
10286namespace {
10287/// Create Global Base Reg pass. This initializes the PIC
10288/// global base register for x86-32.
10289struct CGBR : public MachineFunctionPass {
10290 static char ID;
10291 CGBR() : MachineFunctionPass(ID) {}
10292
10293 bool runOnMachineFunction(MachineFunction &MF) override {
10294 const X86TargetMachine *TM =
10295 static_cast<const X86TargetMachine *>(&MF.getTarget());
10296 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
10297
10298 // Only emit a global base reg in PIC mode.
10299 if (!TM->isPositionIndependent())
10300 return false;
10301
10303 Register GlobalBaseReg = X86FI->getGlobalBaseReg();
10304
10305 // If we didn't need a GlobalBaseReg, don't insert code.
10306 if (GlobalBaseReg == 0)
10307 return false;
10308
10309 // Insert the set of GlobalBaseReg into the first MBB of the function
10310 MachineBasicBlock &FirstMBB = MF.front();
10312 DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
10314 const X86InstrInfo *TII = STI.getInstrInfo();
10315
10316 Register PC;
10317 if (STI.isPICStyleGOT())
10318 PC = RegInfo.createVirtualRegister(&X86::GR32RegClass);
10319 else
10320 PC = GlobalBaseReg;
10321
10322 if (STI.is64Bit()) {
10323 if (TM->getCodeModel() == CodeModel::Large) {
10324 // In the large code model, we are aiming for this code, though the
10325 // register allocation may vary:
10326 // leaq .LN$pb(%rip), %rax
10327 // movq $_GLOBAL_OFFSET_TABLE_ - .LN$pb, %rcx
10328 // addq %rcx, %rax
10329 // RAX now holds address of _GLOBAL_OFFSET_TABLE_.
10330 Register PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
10331 Register GOTReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
10332 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PBReg)
10333 .addReg(X86::RIP)
10334 .addImm(0)
10335 .addReg(0)
10337 .addReg(0);
10338 std::prev(MBBI)->setPreInstrSymbol(MF, MF.getPICBaseSymbol());
10339 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOV64ri), GOTReg)
10340 .addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
10342 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD64rr), PC)
10343 .addReg(PBReg, RegState::Kill)
10344 .addReg(GOTReg, RegState::Kill);
10345 } else {
10346 // In other code models, use a RIP-relative LEA to materialize the
10347 // GOT.
10348 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PC)
10349 .addReg(X86::RIP)
10350 .addImm(0)
10351 .addReg(0)
10352 .addExternalSymbol("_GLOBAL_OFFSET_TABLE_")
10353 .addReg(0);
10354 }
10355 } else {
10356 // Operand of MovePCtoStack is completely ignored by asm printer. It's
10357 // only used in JIT code emission as displacement to pc.
10358 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
10359
10360 // If we're using vanilla 'GOT' PIC style, we should use relative
10361 // addressing not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
10362 if (STI.isPICStyleGOT()) {
10363 // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel],
10364 // %some_register
10365 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
10366 .addReg(PC)
10367 .addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
10369 }
10370 }
10371
10372 return true;
10373 }
10374
10375 StringRef getPassName() const override {
10376 return "X86 PIC Global Base Reg Initialization";
10377 }
10378
10379 void getAnalysisUsage(AnalysisUsage &AU) const override {
10380 AU.setPreservesCFG();
10382 }
10383};
10384} // namespace
10385
10386char CGBR::ID = 0;
10388
10389namespace {
10390struct LDTLSCleanup : public MachineFunctionPass {
10391 static char ID;
10392 LDTLSCleanup() : MachineFunctionPass(ID) {}
10393
10394 bool runOnMachineFunction(MachineFunction &MF) override {
10395 if (skipFunction(MF.getFunction()))
10396 return false;
10397
10399 if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
10400 // No point folding accesses if there isn't at least two.
10401 return false;
10402 }
10403
10405 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
10406 return VisitNode(DT->getRootNode(), 0);
10407 }
10408
10409 // Visit the dominator subtree rooted at Node in pre-order.
10410 // If TLSBaseAddrReg is non-null, then use that to replace any
10411 // TLS_base_addr instructions. Otherwise, create the register
10412 // when the first such instruction is seen, and then use it
10413 // as we encounter more instructions.
10414 bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
10415 MachineBasicBlock *BB = Node->getBlock();
10416 bool Changed = false;
10417
10418 // Traverse the current block.
10419 for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
10420 ++I) {
10421 switch (I->getOpcode()) {
10422 case X86::TLS_base_addr32:
10423 case X86::TLS_base_addr64:
10424 if (TLSBaseAddrReg)
10425 I = ReplaceTLSBaseAddrCall(*I, TLSBaseAddrReg);
10426 else
10427 I = SetRegister(*I, &TLSBaseAddrReg);
10428 Changed = true;
10429 break;
10430 default:
10431 break;
10432 }
10433 }
10434
10435 // Visit the children of this block in the dominator tree.
10436 for (auto &I : *Node) {
10437 Changed |= VisitNode(I, TLSBaseAddrReg);
10438 }
10439
10440 return Changed;
10441 }
10442
10443 // Replace the TLS_base_addr instruction I with a copy from
10444 // TLSBaseAddrReg, returning the new instruction.
10445 MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr &I,
10446 unsigned TLSBaseAddrReg) {
10447 MachineFunction *MF = I.getParent()->getParent();
10448 const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();
10449 const bool is64Bit = STI.is64Bit();
10450 const X86InstrInfo *TII = STI.getInstrInfo();
10451
10452 // Insert a Copy from TLSBaseAddrReg to RAX/EAX.
10454 BuildMI(*I.getParent(), I, I.getDebugLoc(),
10455 TII->get(TargetOpcode::COPY), is64Bit ? X86::RAX : X86::EAX)
10456 .addReg(TLSBaseAddrReg);
10457
10458 // Erase the TLS_base_addr instruction.
10459 I.eraseFromParent();
10460
10461 return Copy;
10462 }
10463
10464 // Create a virtual register in *TLSBaseAddrReg, and populate it by
10465 // inserting a copy instruction after I. Returns the new instruction.
10466 MachineInstr *SetRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) {
10467 MachineFunction *MF = I.getParent()->getParent();
10468 const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();
10469 const bool is64Bit = STI.is64Bit();
10470 const X86InstrInfo *TII = STI.getInstrInfo();
10471
10472 // Create a virtual register for the TLS base address.
10474 *TLSBaseAddrReg = RegInfo.createVirtualRegister(
10475 is64Bit ? &X86::GR64RegClass : &X86::GR32RegClass);
10476
10477 // Insert a copy from RAX/EAX to TLSBaseAddrReg.
10478 MachineInstr *Next = I.getNextNode();
10479 MachineInstr *Copy = BuildMI(*I.getParent(), Next, I.getDebugLoc(),
10480 TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
10481 .addReg(is64Bit ? X86::RAX : X86::EAX);
10482
10483 return Copy;
10484 }
10485
10486 StringRef getPassName() const override {
10487 return "Local Dynamic TLS Access Clean-up";
10488 }
10489
10490 void getAnalysisUsage(AnalysisUsage &AU) const override {
10491 AU.setPreservesCFG();
10494 }
10495};
10496} // namespace
10497
10498char LDTLSCleanup::ID = 0;
10500 return new LDTLSCleanup();
10501}
10502
10503/// Constants defining how certain sequences should be outlined.
10504///
10505/// \p MachineOutlinerDefault implies that the function is called with a call
10506/// instruction, and a return must be emitted for the outlined function frame.
10507///
10508/// That is,
10509///
10510/// I1 OUTLINED_FUNCTION:
10511/// I2 --> call OUTLINED_FUNCTION I1
10512/// I3 I2
10513/// I3
10514/// ret
10515///
10516/// * Call construction overhead: 1 (call instruction)
10517/// * Frame construction overhead: 1 (return instruction)
10518///
10519/// \p MachineOutlinerTailCall implies that the function is being tail called.
10520/// A jump is emitted instead of a call, and the return is already present in
10521/// the outlined sequence. That is,
10522///
10523/// I1 OUTLINED_FUNCTION:
10524/// I2 --> jmp OUTLINED_FUNCTION I1
10525/// ret I2
10526/// ret
10527///
10528/// * Call construction overhead: 1 (jump instruction)
10529/// * Frame construction overhead: 0 (don't need to return)
10530///
10532
10533std::optional<std::unique_ptr<outliner::OutlinedFunction>>
10535 const MachineModuleInfo &MMI,
10536 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
10537 unsigned MinRepeats) const {
10538 unsigned SequenceSize = 0;
10539 for (auto &MI : RepeatedSequenceLocs[0]) {
10540 // FIXME: x86 doesn't implement getInstSizeInBytes, so
10541 // we can't tell the cost. Just assume each instruction
10542 // is one byte.
10543 if (MI.isDebugInstr() || MI.isKill())
10544 continue;
10545 SequenceSize += 1;
10546 }
10547
10548 // We check to see if CFI Instructions are present, and if they are
10549 // we find the number of CFI Instructions in the candidates.
10550 unsigned CFICount = 0;
10551 for (auto &I : RepeatedSequenceLocs[0]) {
10552 if (I.isCFIInstruction())
10553 CFICount++;
10554 }
10555
10556 // We compare the number of found CFI Instructions to the number of CFI
10557 // instructions in the parent function for each candidate. We must check this
10558 // since if we outline one of the CFI instructions in a function, we have to
10559 // outline them all for correctness. If we do not, the address offsets will be
10560 // incorrect between the two sections of the program.
10561 for (outliner::Candidate &C : RepeatedSequenceLocs) {
10562 std::vector<MCCFIInstruction> CFIInstructions =
10563 C.getMF()->getFrameInstructions();
10564
10565 if (CFICount > 0 && CFICount != CFIInstructions.size())
10566 return std::nullopt;
10567 }
10568
10569 // FIXME: Use real size in bytes for call and ret instructions.
10570 if (RepeatedSequenceLocs[0].back().isTerminator()) {
10571 for (outliner::Candidate &C : RepeatedSequenceLocs)
10572 C.setCallInfo(MachineOutlinerTailCall, 1);
10573
10574 return std::make_unique<outliner::OutlinedFunction>(
10575 RepeatedSequenceLocs, SequenceSize,
10576 0, // Number of bytes to emit frame.
10577 MachineOutlinerTailCall // Type of frame.
10578 );
10579 }
10580
10581 if (CFICount > 0)
10582 return std::nullopt;
10583
10584 for (outliner::Candidate &C : RepeatedSequenceLocs)
10585 C.setCallInfo(MachineOutlinerDefault, 1);
10586
10587 return std::make_unique<outliner::OutlinedFunction>(
10588 RepeatedSequenceLocs, SequenceSize, 1, MachineOutlinerDefault);
10589}
10590
10592 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
10593 const Function &F = MF.getFunction();
10594
10595 // Does the function use a red zone? If it does, then we can't risk messing
10596 // with the stack.
10597 if (Subtarget.getFrameLowering()->has128ByteRedZone(MF)) {
10598 // It could have a red zone. If it does, then we don't want to touch it.
10600 if (!X86FI || X86FI->getUsesRedZone())
10601 return false;
10602 }
10603
10604 // If we *don't* want to outline from things that could potentially be deduped
10605 // then return false.
10606 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
10607 return false;
10608
10609 // This function is viable for outlining, so return true.
10610 return true;
10611}
10612
10616 unsigned Flags) const {
10617 MachineInstr &MI = *MIT;
10618
10619 // Is this a terminator for a basic block?
10620 if (MI.isTerminator())
10621 // TargetInstrInfo::getOutliningType has already filtered out anything
10622 // that would break this, so we can allow it here.
10624
10625 // Don't outline anything that modifies or reads from the stack pointer.
10626 //
10627 // FIXME: There are instructions which are being manually built without
10628 // explicit uses/defs so we also have to check the MCInstrDesc. We should be
10629 // able to remove the extra checks once those are fixed up. For example,
10630 // sometimes we might get something like %rax = POP64r 1. This won't be
10631 // caught by modifiesRegister or readsRegister even though the instruction
10632 // really ought to be formed so that modifiesRegister/readsRegister would
10633 // catch it.
10634 if (MI.modifiesRegister(X86::RSP, &RI) || MI.readsRegister(X86::RSP, &RI) ||
10635 MI.getDesc().hasImplicitUseOfPhysReg(X86::RSP) ||
10636 MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP))
10638
10639 // Outlined calls change the instruction pointer, so don't read from it.
10640 if (MI.readsRegister(X86::RIP, &RI) ||
10641 MI.getDesc().hasImplicitUseOfPhysReg(X86::RIP) ||
10642 MI.getDesc().hasImplicitDefOfPhysReg(X86::RIP))
10644
10645 // Don't outline CFI instructions.
10646 if (MI.isCFIInstruction())
10648
10650}
10651
10654 const outliner::OutlinedFunction &OF) const {
10655 // If we're a tail call, we already have a return, so don't do anything.
10657 return;
10658
10659 // We're a normal call, so our sequence doesn't have a return instruction.
10660 // Add it in.
10661 MachineInstr *retq = BuildMI(MF, DebugLoc(), get(X86::RET64));
10662 MBB.insert(MBB.end(), retq);
10663}
10664
10668 // Is it a tail call?
10669 if (C.CallConstructionID == MachineOutlinerTailCall) {
10670 // Yes, just insert a JMP.
10671 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(X86::TAILJMPd64))
10672 .addGlobalAddress(M.getNamedValue(MF.getName())));
10673 } else {
10674 // No, insert a call.
10675 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(X86::CALL64pcrel32))
10676 .addGlobalAddress(M.getNamedValue(MF.getName())));
10677 }
10678
10679 return It;
10680}
10681
10684 DebugLoc &DL,
10685 bool AllowSideEffects) const {
10686 const MachineFunction &MF = *MBB.getParent();
10687 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
10689
10690 if (ST.hasMMX() && X86::VR64RegClass.contains(Reg))
10691 // FIXME: Should we ignore MMX registers?
10692 return;
10693
10694 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
10695 // Convert register to the 32-bit version. Both 'movl' and 'xorl' clear the
10696 // upper bits of a 64-bit register automagically.
10697 Reg = getX86SubSuperRegister(Reg, 32);
10698
10699 if (!AllowSideEffects)
10700 // XOR affects flags, so use a MOV instead.
10701 BuildMI(MBB, Iter, DL, get(X86::MOV32ri), Reg).addImm(0);
10702 else
10703 BuildMI(MBB, Iter, DL, get(X86::XOR32rr), Reg)
10704 .addReg(Reg, RegState::Undef)
10705 .addReg(Reg, RegState::Undef);
10706 } else if (X86::VR128RegClass.contains(Reg)) {
10707 // XMM#
10708 if (!ST.hasSSE1())
10709 return;
10710
10711 // PXOR is safe to use because it doesn't affect flags.
10712 BuildMI(MBB, Iter, DL, get(X86::PXORrr), Reg)
10713 .addReg(Reg, RegState::Undef)
10714 .addReg(Reg, RegState::Undef);
10715 } else if (X86::VR256RegClass.contains(Reg)) {
10716 // YMM#
10717 if (!ST.hasAVX())
10718 return;
10719
10720 // VPXOR is safe to use because it doesn't affect flags.
10721 BuildMI(MBB, Iter, DL, get(X86::VPXORrr), Reg)
10722 .addReg(Reg, RegState::Undef)
10723 .addReg(Reg, RegState::Undef);
10724 } else if (X86::VR512RegClass.contains(Reg)) {
10725 // ZMM#
10726 if (!ST.hasAVX512())
10727 return;
10728
10729 // VPXORY is safe to use because it doesn't affect flags.
10730 BuildMI(MBB, Iter, DL, get(X86::VPXORYrr), Reg)
10731 .addReg(Reg, RegState::Undef)
10732 .addReg(Reg, RegState::Undef);
10733 } else if (X86::VK1RegClass.contains(Reg) || X86::VK2RegClass.contains(Reg) ||
10734 X86::VK4RegClass.contains(Reg) || X86::VK8RegClass.contains(Reg) ||
10735 X86::VK16RegClass.contains(Reg)) {
10736 if (!ST.hasVLX())
10737 return;
10738
10739 // KXOR is safe to use because it doesn't affect flags.
10740 unsigned Op = ST.hasBWI() ? X86::KXORQkk : X86::KXORWkk;
10741 BuildMI(MBB, Iter, DL, get(Op), Reg)
10742 .addReg(Reg, RegState::Undef)
10743 .addReg(Reg, RegState::Undef);
10744 }
10745}
10746
10748 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
10749 bool DoRegPressureReduce) const {
10750 unsigned Opc = Root.getOpcode();
10751 switch (Opc) {
10752 case X86::VPDPWSSDrr:
10753 case X86::VPDPWSSDrm:
10754 case X86::VPDPWSSDYrr:
10755 case X86::VPDPWSSDYrm: {
10756 if (!Subtarget.hasFastDPWSSD()) {
10758 return true;
10759 }
10760 break;
10761 }
10762 case X86::VPDPWSSDZ128r:
10763 case X86::VPDPWSSDZ128m:
10764 case X86::VPDPWSSDZ256r:
10765 case X86::VPDPWSSDZ256m:
10766 case X86::VPDPWSSDZr:
10767 case X86::VPDPWSSDZm: {
10768 if (Subtarget.hasBWI() && !Subtarget.hasFastDPWSSD()) {
10770 return true;
10771 }
10772 break;
10773 }
10774 }
10776 Patterns, DoRegPressureReduce);
10777}
10778
10779static void
10783 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
10784 MachineFunction *MF = Root.getMF();
10786
10787 unsigned Opc = Root.getOpcode();
10788 unsigned AddOpc = 0;
10789 unsigned MaddOpc = 0;
10790 switch (Opc) {
10791 default:
10792 assert(false && "It should not reach here");
10793 break;
10794 // vpdpwssd xmm2,xmm3,xmm1
10795 // -->
10796 // vpmaddwd xmm3,xmm3,xmm1
10797 // vpaddd xmm2,xmm2,xmm3
10798 case X86::VPDPWSSDrr:
10799 MaddOpc = X86::VPMADDWDrr;
10800 AddOpc = X86::VPADDDrr;
10801 break;
10802 case X86::VPDPWSSDrm:
10803 MaddOpc = X86::VPMADDWDrm;
10804 AddOpc = X86::VPADDDrr;
10805 break;
10806 case X86::VPDPWSSDZ128r:
10807 MaddOpc = X86::VPMADDWDZ128rr;
10808 AddOpc = X86::VPADDDZ128rr;
10809 break;
10810 case X86::VPDPWSSDZ128m:
10811 MaddOpc = X86::VPMADDWDZ128rm;
10812 AddOpc = X86::VPADDDZ128rr;
10813 break;
10814 // vpdpwssd ymm2,ymm3,ymm1
10815 // -->
10816 // vpmaddwd ymm3,ymm3,ymm1
10817 // vpaddd ymm2,ymm2,ymm3
10818 case X86::VPDPWSSDYrr:
10819 MaddOpc = X86::VPMADDWDYrr;
10820 AddOpc = X86::VPADDDYrr;
10821 break;
10822 case X86::VPDPWSSDYrm:
10823 MaddOpc = X86::VPMADDWDYrm;
10824 AddOpc = X86::VPADDDYrr;
10825 break;
10826 case X86::VPDPWSSDZ256r:
10827 MaddOpc = X86::VPMADDWDZ256rr;
10828 AddOpc = X86::VPADDDZ256rr;
10829 break;
10830 case X86::VPDPWSSDZ256m:
10831 MaddOpc = X86::VPMADDWDZ256rm;
10832 AddOpc = X86::VPADDDZ256rr;
10833 break;
10834 // vpdpwssd zmm2,zmm3,zmm1
10835 // -->
10836 // vpmaddwd zmm3,zmm3,zmm1
10837 // vpaddd zmm2,zmm2,zmm3
10838 case X86::VPDPWSSDZr:
10839 MaddOpc = X86::VPMADDWDZrr;
10840 AddOpc = X86::VPADDDZrr;
10841 break;
10842 case X86::VPDPWSSDZm:
10843 MaddOpc = X86::VPMADDWDZrm;
10844 AddOpc = X86::VPADDDZrr;
10845 break;
10846 }
10847 // Create vpmaddwd.
10848 const TargetRegisterClass *RC =
10849 RegInfo.getRegClass(Root.getOperand(0).getReg());
10850 Register NewReg = RegInfo.createVirtualRegister(RC);
10851 MachineInstr *Madd = Root.getMF()->CloneMachineInstr(&Root);
10852 Madd->setDesc(TII.get(MaddOpc));
10853 Madd->untieRegOperand(1);
10854 Madd->removeOperand(1);
10855 Madd->getOperand(0).setReg(NewReg);
10856 InstrIdxForVirtReg.insert(std::make_pair(NewReg, 0));
10857 // Create vpaddd.
10858 Register DstReg = Root.getOperand(0).getReg();
10859 bool IsKill = Root.getOperand(1).isKill();
10860 MachineInstr *Add =
10861 BuildMI(*MF, MIMetadata(Root), TII.get(AddOpc), DstReg)
10862 .addReg(Root.getOperand(1).getReg(), getKillRegState(IsKill))
10863 .addReg(Madd->getOperand(0).getReg(), getKillRegState(true));
10864 InsInstrs.push_back(Madd);
10865 InsInstrs.push_back(Add);
10866 DelInstrs.push_back(&Root);
10867}
10868
10870 MachineInstr &Root, unsigned Pattern,
10873 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
10874 switch (Pattern) {
10875 default:
10876 // Reassociate instructions.
10878 DelInstrs, InstrIdxForVirtReg);
10879 return;
10881 genAlternativeDpCodeSequence(Root, *this, InsInstrs, DelInstrs,
10882 InstrIdxForVirtReg);
10883 return;
10884 }
10885}
10886
10887// See also: X86DAGToDAGISel::SelectInlineAsmMemoryOperand().
10889 int FI) const {
10891 M.BaseType = X86AddressMode::FrameIndexBase;
10892 M.Base.FrameIndex = FI;
10893 M.getFullAddress(Ops);
10894}
10895
10896#define GET_INSTRINFO_HELPERS
10897#include "X86GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerDefault
unsigned RegSize
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
Definition: InlineInfo.cpp:108
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static bool Expand2AddrUndef(MachineInstrBuilder &MIB, const MCInstrDesc &Desc)
Expand a single-def pseudo instruction to a two-addr instruction with two undef reads of the register...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool IsDead
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Provides some synthesis utilities to produce sequences of values.
static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
#define FROM_TO(FROM, TO)
static bool is64Bit(const char *name)
#define GET_EGPR_IF_ENABLED(OPC)
static bool isLEA(unsigned Opcode)
static void addOperands(MachineInstrBuilder &MIB, ArrayRef< MachineOperand > MOs, int PtrOffset=0)
static std::optional< ParamLoadedValue > describeMOVrrLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetRegisterInfo *TRI)
If DescribedReg overlaps with the MOVrr instruction's destination register then, if possible,...
static cl::opt< unsigned > PartialRegUpdateClearance("partial-reg-update-clearance", cl::desc("Clearance between two register writes " "for inserting XOR to avoid partial " "register update"), cl::init(64), cl::Hidden)
static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, MachineInstr &MI)
static bool isConvertibleLEA(MachineInstr *MI)
static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, const X86Subtarget &Subtarget)
static bool isAMXOpcode(unsigned Opc)
static int getJumpTableIndexFromReg(const MachineRegisterInfo &MRI, Register Reg)
static void updateOperandRegConstraints(MachineFunction &MF, MachineInstr &NewMI, const TargetInstrInfo &TII)
static bool findRedundantFlagInstr(MachineInstr &CmpInstr, MachineInstr &CmpValDefInstr, const MachineRegisterInfo *MRI, MachineInstr **AndInstr, const TargetRegisterInfo *TRI, bool &NoSignFlag, bool &ClearsOverflowFlag)
static int getJumpTableIndexFromAddr(const MachineInstr &MI)
static bool AdjustBlendMask(unsigned OldMask, unsigned OldWidth, unsigned NewWidth, unsigned *pNewMask=nullptr)
static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, bool MinusOne)
static unsigned getNewOpcFromTable(ArrayRef< X86TableEntry > Table, unsigned Opc)
static unsigned getStoreRegOpcode(Register SrcReg, const TargetRegisterClass *RC, bool IsStackAligned, const X86Subtarget &STI)
#define FOLD_BROADCAST(SIZE)
static cl::opt< unsigned > UndefRegClearance("undef-reg-clearance", cl::desc("How many idle instructions we would like before " "certain undef register reads"), cl::init(128), cl::Hidden)
#define CASE_BCAST_TYPE_OPC(TYPE, OP16, OP32, OP64)
static bool isTruncatedShiftCountForLEA(unsigned ShAmt)
Check whether the given shift count is appropriate can be represented by a LEA instruction.
static cl::opt< bool > ReMatPICStubLoad("remat-pic-stub-load", cl::desc("Re-materialize load from stub in PIC mode"), cl::init(false), cl::Hidden)
static SmallVector< MachineMemOperand *, 2 > extractLoadMMOs(ArrayRef< MachineMemOperand * > MMOs, MachineFunction &MF)
static MachineInstr * fuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr &MI, const TargetInstrInfo &TII)
static void printFailMsgforFold(const MachineInstr &MI, unsigned Idx)
static bool canConvert2Copy(unsigned Opc)
static cl::opt< bool > NoFusing("disable-spill-fusing", cl::desc("Disable fusing of spill code into instructions"), cl::Hidden)
static bool expandNOVLXStore(MachineInstrBuilder &MIB, const TargetRegisterInfo *TRI, const MCInstrDesc &StoreDesc, const MCInstrDesc &ExtractDesc, unsigned SubIdx)
static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes)
static bool Expand2AddrKreg(MachineInstrBuilder &MIB, const MCInstrDesc &Desc, Register Reg)
Expand a single-def pseudo instruction to a two-addr instruction with two k0 reads.
#define VPERM_CASES_BROADCAST(Suffix)
static X86::CondCode isUseDefConvertible(const MachineInstr &MI)
Check whether the use can be converted to remove a comparison against zero.
static bool expandSHXDROT(MachineInstrBuilder &MIB, const MCInstrDesc &Desc)
static unsigned getLoadRegOpcode(Register DestReg, const TargetRegisterClass *RC, bool IsStackAligned, const X86Subtarget &STI)
static void expandLoadStackGuard(MachineInstrBuilder &MIB, const TargetInstrInfo &TII)
static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum, bool ForLoadFold=false)
static MachineInstr * makeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr &MI)
#define GET_ND_IF_ENABLED(OPC)
static bool hasPartialRegUpdate(unsigned Opcode, const X86Subtarget &Subtarget, bool ForLoadFold=false)
Return true for all instructions that only update the first 32 or 64-bits of the destination register...
static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, const X86Subtarget &Subtarget)
static const uint16_t * lookupAVX512(unsigned opcode, unsigned domain, ArrayRef< uint16_t[4]> Table)
static unsigned getLoadStoreRegOpcode(Register Reg, const TargetRegisterClass *RC, bool IsStackAligned, const X86Subtarget &STI, bool Load)
#define VPERM_CASES(Suffix)
#define FROM_TO_SIZE(A, B, S)
static void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2)
static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes)
static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag, bool &ClearsOverflowFlag)
Check whether the definition can be converted to remove a comparison against zero.
static bool isHReg(unsigned Reg)
Test if the given register is a physical h register.
static MachineInstr * fuseInst(MachineFunction &MF, unsigned Opcode, unsigned OpNo, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr &MI, const TargetInstrInfo &TII, int PtrOffset=0)
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static unsigned getCommutedVPERMV3Opcode(unsigned Opcode)
static bool expandXorFP(MachineInstrBuilder &MIB, const TargetInstrInfo &TII)
static MachineBasicBlock * getFallThroughMBB(MachineBasicBlock *MBB, MachineBasicBlock *TBB)
static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, const MachineInstr &UserMI, const MachineFunction &MF)
Check if LoadMI is a partial register load that we can't fold into MI because the latter uses content...
static unsigned getLoadStoreOpcodeForFP16(bool Load, const X86Subtarget &STI)
static cl::opt< bool > PrintFailedFusing("print-failed-fuse-candidates", cl::desc("Print instructions that the allocator wants to" " fuse, but the X86 backend currently can't"), cl::Hidden)
static bool expandNOVLXLoad(MachineInstrBuilder &MIB, const TargetRegisterInfo *TRI, const MCInstrDesc &LoadDesc, const MCInstrDesc &BroadcastDesc, unsigned SubIdx)
#define CASE_ND(OP)
static unsigned getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1, unsigned SrcOpIdx2)
This determines which of three possible cases of a three source commute the source indexes correspond...
static bool isX87Reg(unsigned Reg)
Return true if the Reg is X87 register.
static void genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg)
static unsigned getTruncatedShiftCount(const MachineInstr &MI, unsigned ShiftAmtOperandIdx)
Check whether the shift count for a machine operand is non-zero.
static SmallVector< MachineMemOperand *, 2 > extractStoreMMOs(ArrayRef< MachineMemOperand * > MMOs, MachineFunction &MF)
static unsigned getBroadcastOpcode(const X86FoldTableEntry *I, const TargetRegisterClass *RC, const X86Subtarget &STI)
static unsigned convertALUrr2ALUri(unsigned Opc)
Convert an ALUrr opcode to corresponding ALUri opcode.
static bool regIsPICBase(Register BaseReg, const MachineRegisterInfo &MRI)
Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
static bool isCommutableVPERMV3Instruction(unsigned Opcode)
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:206
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:209
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:219
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:157
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:703
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:679
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:688
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:677
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:678
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:697
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:687
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:681
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:684
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:685
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:680
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:682
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:701
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:689
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:699
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:686
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:683
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
DWARF expression.
static void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
static DIExpression * appendExt(const DIExpression *Expr, unsigned FromSize, unsigned ToSize, bool Signed)
Append a zero- or sign-extension to Expr.
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Base class for the actual dominator tree node.
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:707
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:369
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:52
void stepForward(const MachineInstr &MI, SmallVectorImpl< std::pair< MCPhysReg, const MachineOperand * > > &Clobbers)
Simulates liveness when stepping forward over an instruction(bundle).
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
const Segment * getSegmentContaining(SlotIndex Idx) const
Return the segment that contains the specified index, or null if there is none.
Definition: LiveInterval.h:408
void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:759
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:598
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:185
void setOpcode(unsigned Op)
Definition: MCInst.h:198
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
unsigned getOpcode() const
Return the opcode number for this descriptor.
Definition: MCInstrDesc.h:230
unsigned char NumDefs
Definition: MCInstrDesc.h:207
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition: MCInstrDesc.h:85
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1543
Set of metadata that should be preserved when using BuildMI().
SimpleValueType SimpleTy
unsigned pred_size() const
MachineInstrBundleIterator< const MachineInstr > const_iterator
reverse_iterator rend()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
reverse_iterator rbegin()
@ LQR_Dead
Register is known to be fully dead.
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
This class is a data container for one entry in a MachineConstantPool.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
union llvm::MachineConstantPoolEntry::@204 Val
The constant itself.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImplicit=false)
CreateMachineInstr - Allocate a new MachineInstr.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineBasicBlock & front() const
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDisp(const MachineOperand &Disp, int64_t off, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
mop_iterator operands_begin()
Definition: MachineInstr.h:685
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
bool isImplicitDef() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
void dropDebugNumber()
Drop any variable location debugging information associated with this instruction.
Definition: MachineInstr.h:556
void setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just prior to the instruction itself.
void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:397
bool isSafeToMove(bool &SawStore) const
Return true if it is safe to move this instruction.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:578
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:572
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
unsigned getNumExplicitDefs() const
Returns the number of non-implicit definitions.
void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:821
void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)
Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:806
bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:404
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
unsigned getNumDefs() const
Returns the total number of definitions.
Definition: MachineInstr.h:644
void setDebugLoc(DebugLoc DL)
Replace current source information with new such.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImplicit(bool Val=true)
void setImm(int64_t immVal)
int64_t getImm() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
bool isJTI() const
isJTI - Tests if this is a MO_JumpTableIndex operand.
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
static MachineOperand CreateImm(int64_t Val)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateCPI(unsigned Idx, int Offset, unsigned TargetFlags=0)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
static MachineOperand CreateFI(int Idx)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
const TargetRegisterInfo * getTargetRegisterInfo() const
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:65
SlotIndex getBaseIndex() const
Returns the base index for associated with this index.
Definition: SlotIndexes.h:224
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:237
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction.
virtual bool hasReassociableOperands(const MachineInstr &Inst, const MachineBasicBlock *MBB) const
Return true when \P Inst has reassociable operands in the same \P MBB.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum,...
bool isPositionIndependent() const
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TypeSize getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Provide an instruction scheduling machine model to CodeGen passes.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
LLVM Value Representation.
Definition: Value.h:74
bool has128ByteRedZone(const MachineFunction &MF) const
Return true if the function has a redzone (accessible bytes past the frame of the top of stack functi...
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
void getFrameIndexOperands(SmallVectorImpl< MachineOperand > &Ops, int FI) const override
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
Check if there exists an earlier instruction that operates on the same source operands and sets eflag...
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Overrides the isSchedulingBoundary from Codegen/TargetInstrInfo.cpp to make it capable of identifying...
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const override
Given a machine instruction descriptor, returns the register class constraint for OpNum,...
void replaceBranchWithTailCall(MachineBasicBlock &MBB, SmallVectorImpl< MachineOperand > &Cond, const MachineInstr &TailCall) const override
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned getGlobalBaseReg(MachineFunction *MF) const
getGlobalBaseReg - Return a virtual register initialized with the the global base register value.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex=nullptr) const override
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Returns true iff the routine could find two commutable operands in the given machine instruction.
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
static bool isDataInvariantLoad(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value l...
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned CommuteOpIdx1, unsigned CommuteOpIdx2) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
const X86RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
Definition: X86InstrInfo.h:211
bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override
bool hasCommutePreference(MachineInstr &MI, bool &Commute) const override
Returns true if we have preference on the operands order in MI, the commute decision is returned in C...
bool hasLiveCondCodeDef(MachineInstr &MI) const
True if MI has a condition code def, e.g.
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
bool canMakeTailCallConditional(SmallVectorImpl< MachineOperand > &Cond, const MachineInstr &TailCall) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
convertToThreeAddress - This method must be implemented by targets that set the M_CONVERTIBLE_TO_3_AD...
X86InstrInfo(X86Subtarget &STI)
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool expandPostRAPseudo(MachineInstr &MI) const override
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
MCInst getNop() const override
Return the noop instruction to use for a noop.
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
This is a used by the pre-regalloc scheduler to determine (in conjunction with areLoadsFromSameBasePt...
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl< MachineInstr * > &NewMIs) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
Fold a load or store of the specified stack slot into the specified machine instruction for the speci...
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds potential patterns, this function generates the instructions ...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
isStoreToStackSlotPostFE - Check for post-frame ptr elimination stack locations as well.
bool isUnconditionalTailCall(const MachineInstr &MI) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
std::optional< std::unique_ptr< outliner::OutlinedFunction > > getOutliningCandidateInfo(const MachineModuleInfo &MMI, std::vector< outliner::Candidate > &RepeatedSequenceLocs, unsigned MinRepeats) const override
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
isLoadFromStackSlotPostFE - Check for post-frame ptr elimination stack locations as well.
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool setExecutionDomainCustom(MachineInstr &MI, unsigned Domain) const
int getSPAdjust(const MachineInstr &MI) const override
getSPAdjust - This returns the stack pointer adjustment made by this instruction.
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
int getJumpTableIndex(const MachineInstr &MI) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, MachineInstr &NewMI1, MachineInstr &NewMI2) const override
This is an architecture-specific helper function of reassociateOps.
std::pair< uint16_t, uint16_t > getExecutionDomain(const MachineInstr &MI) const override
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
isCoalescableExtInstr - Return true if the instruction is a "coalescable" extension instruction.
void loadStoreTileReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Opc, Register Reg, int FrameIdx, bool isKill=false) const
bool classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, unsigned LEAOpcode, bool AllowSP, Register &NewSrc, bool &isKill, MachineOperand &ImplicitOp, LiveVariables *LV, LiveIntervals *LIS) const
Given an operand within a MachineInstr, insert preceding code to put it into the right format for a p...
bool hasReassociableOperands(const MachineInstr &Inst, const MachineBasicBlock *MBB) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, TargetInstrInfo::MachineBranchPredicate &MBP, bool AllowModify=false) const override
static bool isDataInvariant(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value o...
unsigned getUndefRegClearance(const MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override
Inform the BreakFalseDeps pass how many idle instructions we would like before certain undef register...
void breakPartialRegDependency(MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
int64_t getFrameAdjustment(const MachineInstr &I) const
Returns the stack pointer adjustment that happens inside the frame setup..destroy sequence (e....
Definition: X86InstrInfo.h:215
bool hasHighOperandLatency(const TargetSchedModel &SchedModel, const MachineRegisterInfo *MRI, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override
uint16_t getExecutionDomainCustom(const MachineInstr &MI) const
bool isHighLatencyDef(int opc) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const override
foldImmediate - 'Reg' is known to be defined by a move immediate instruction, try to fold the immedia...
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
unsigned getFMA3OpcodeToCommuteOperands(const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2, const X86InstrFMA3Group &FMA3Group) const
Returns an adjusted FMA opcode that must be used in FMA instruction that performs the same computatio...
bool preservesZeroValueInReg(const MachineInstr *MI, const Register NullValueReg, const TargetRegisterInfo *TRI) const override
unsigned getPartialRegUpdateClearance(const MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override
Inform the BreakFalseDeps pass how many idle instructions we would like before a partial register upd...
MachineInstr * optimizeLoadInstr(MachineInstr &MI, const MachineRegisterInfo *MRI, Register &FoldAsLoadDefReg, MachineInstr *&DefMI) const override
Try to remove the load by folding it to a register operand at the use.
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
unsigned getNumLocalDynamicTLSAccesses() const
bool canRealignStack(const MachineFunction &MF) const override
bool isPICStyleGOT() const
Definition: X86Subtarget.h:328
bool canUseCMOV() const
Definition: X86Subtarget.h:192
bool isTargetWin64() const
Definition: X86Subtarget.h:324
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:122
bool hasAVX512() const
Definition: X86Subtarget.h:201
bool hasSSE41() const
Definition: X86Subtarget.h:197
bool hasSSE2() const
Definition: X86Subtarget.h:194
const X86RegisterInfo * getRegisterInfo() const override
Definition: X86Subtarget.h:132
bool hasAVX() const
Definition: X86Subtarget.h:199
const X86FrameLowering * getFrameLowering() const override
Definition: X86Subtarget.h:124
bool hasAVX2() const
Definition: X86Subtarget.h:200
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1613
@ OPERAND_MEMORY
Definition: MCInstrDesc.h:62
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ X86
Windows x64, Windows Itanium (IA-64)
Reg
All possible values of the reg field in the ModR/M byte.
bool isKMergeMasked(uint64_t TSFlags)
Definition: X86BaseInfo.h:1319
@ EVEX
EVEX - Specifies that this instruction use EVEX form which provides syntax support up to 32 512-bit r...
Definition: X86BaseInfo.h:825
@ SSEDomainShift
Execution domain for SSE instructions.
Definition: X86BaseInfo.h:811
@ MO_GOT_ABSOLUTE_ADDRESS
MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a relocation of: SYMBOL_LABEL + [.
Definition: X86BaseInfo.h:367
@ MO_INDNTPOFF
MO_INDNTPOFF - On a symbol operand this indicates that the immediate is the absolute address of the G...
Definition: X86BaseInfo.h:432
@ MO_GOTNTPOFF
MO_GOTNTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry w...
Definition: X86BaseInfo.h:456
@ MO_GOTTPOFF
MO_GOTTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry wi...
Definition: X86BaseInfo.h:425
@ MO_PIC_BASE_OFFSET
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:371
@ MO_GOTPCREL
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:387
bool canUseApxExtendedReg(const MCInstrDesc &Desc)
Definition: X86BaseInfo.h:1260
bool isPseudo(uint64_t TSFlags)
Definition: X86BaseInfo.h:887
bool isKMasked(uint64_t TSFlags)
Definition: X86BaseInfo.h:1314
int getMemoryOperandNo(uint64_t TSFlags)
Definition: X86BaseInfo.h:1011
unsigned getOperandBias(const MCInstrDesc &Desc)
Compute whether all of the def operands are repeated in the uses and therefore should be skipped.
Definition: X86BaseInfo.h:968
CondCode getCondFromBranch(const MachineInstr &MI)
CondCode getCondFromCFCMov(const MachineInstr &MI)
@ AddrScaleAmt
Definition: X86BaseInfo.h:30
@ AddrSegmentReg
Definition: X86BaseInfo.h:34
@ AddrIndexReg
Definition: X86BaseInfo.h:31
@ AddrNumOperands
Definition: X86BaseInfo.h:36
@ LAST_VALID_COND
Definition: X86BaseInfo.h:94
CondCode getCondFromMI(const MachineInstr &MI)
Return the condition code of the instruction.
int getFirstAddrOperandIdx(const MachineInstr &MI)
Return the index of the instruction's first address operand, if it has a memory reference,...
unsigned getSwappedVCMPImm(unsigned Imm)
Get the VCMP immediate if the opcodes are swapped.
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
unsigned getSwappedVPCOMImm(unsigned Imm)
Get the VPCOM immediate if the opcodes are swapped.
bool isX87Instruction(MachineInstr &MI)
Check if the instruction is X87 instruction.
unsigned getNonNDVariant(unsigned Opc)
unsigned getVPCMPImmForCond(ISD::CondCode CC)
Get the VPCMP immediate for the given condition.
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
CondCode getCondFromSETCC(const MachineInstr &MI)
unsigned getSwappedVPCMPImm(unsigned Imm)
Get the VPCMP immediate if the opcodes are swapped.
CondCode getCondFromCCMP(const MachineInstr &MI)
int getCCMPCondFlagsFromCondCode(CondCode CC)
int getCondSrcNoFromDesc(const MCInstrDesc &MCID)
Return the source operand # for condition code by MCID.
const Constant * getConstantFromPool(const MachineInstr &MI, unsigned OpNo)
Find any constant pool entry associated with a specific instruction operand.
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false, bool HasNDD=false)
Return a cmov opcode for the given register size in bytes, and operand type.
unsigned getNFVariant(unsigned Opc)
unsigned getVectorRegisterWidth(const MCOperandInfo &Info)
Get the width of the vector register operand.
CondCode getCondFromCMov(const MachineInstr &MI)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
static bool isMem(const MachineInstr &MI, unsigned Op)
Definition: X86InstrInfo.h:170
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
FunctionPass * createX86GlobalBaseRegPass()
This pass initializes a global base register for PIC on x86-32.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
unsigned getDeadRegState(bool B)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
MaybeAlign getAlign(const Function &F, unsigned Index)
FunctionPass * createCleanupLocalDynamicTLSPass()
This pass combines multiple accesses to local-dynamic TLS variables so that the TLS base address for ...
const X86FoldTableEntry * lookupBroadcastFoldTable(unsigned RegOp, unsigned OpNum)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
const X86InstrFMA3Group * getFMA3Group(unsigned Opcode, uint64_t TSFlags)
Returns a reference to a group of FMA3 opcodes to where the given Opcode is included.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset],...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
const X86FoldTableEntry * lookupTwoAddrFoldTable(unsigned RegOp)
static const MachineInstrBuilder & addRegReg(const MachineInstrBuilder &MIB, unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2)
addRegReg - This function is used to add a memory reference of the form: [Reg + Reg].
static const MachineInstrBuilder & addOffset(const MachineInstrBuilder &MIB, int Offset)
unsigned getUndefRegState(bool B)
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
unsigned getDefRegState(bool B)
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1978
@ Add
Sum of integers.
unsigned getKillRegState(bool B)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
@ DPWSSD
Definition: X86InstrInfo.h:32
const X86FoldTableEntry * lookupUnfoldTable(unsigned MemOp)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool matchBroadcastSize(const X86FoldTableEntry &Entry, unsigned BroadcastBits)
const X86FoldTableEntry * lookupFoldTable(unsigned RegOp, unsigned OpNum)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
Extended Value Type.
Definition: ValueTypes.h:35
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
This represents a simple continuous liveness interval for a value.
Definition: LiveInterval.h:162
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction's which are the last use of this virtual register (kill it) in the...
Definition: LiveVariables.h:88
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
X86AddressMode - This struct holds a generalized full x86 address mode.
This class is used to group {132, 213, 231} forms of FMA opcodes together.
unsigned get213Opcode() const
Returns the 213 form of FMA opcode.
unsigned get231Opcode() const
Returns the 231 form of FMA opcode.
bool isIntrinsic() const
Returns true iff the group of FMA opcodes holds intrinsic opcodes.
unsigned get132Opcode() const
Returns the 132 form of FMA opcode.
An individual sequence of instructions to be replaced with a call to an outlined function.
The information necessary to create an outlined function for some class of candidate.
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.