LLVM 20.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
21#include "llvm/ADT/Statistic.h"
26#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicsLoongArch.h"
30#include "llvm/Support/Debug.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "loongarch-isel-lowering"
38
39STATISTIC(NumTailCalls, "Number of tail calls");
40
41static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(false));
44
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 MVT GRLenVT = Subtarget.getGRLenVT();
50
51 // Set up the register classes.
52
53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72 // Set operations for LA32 and LA64.
73
75 MVT::i1, Promote);
76
83
86 GRLenVT, Custom);
87
89
94
97
101
102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103 // we get to know which of sll and revb.2h is faster.
106
107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109 // and i32 could still be byte-swapped relatively cheaply.
111
117
120
121 // Set operations for LA64 only.
122
123 if (Subtarget.is64Bit()) {
141
145 Custom);
147 }
148
149 // Set operations for LA32 only.
150
151 if (!Subtarget.is64Bit()) {
157 }
158
160
161 static const ISD::CondCode FPCCToExpand[] = {
164
165 // Set operations for 'F' feature.
166
167 if (Subtarget.hasBasicF()) {
168 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
169 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
170 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
171
187
188 if (Subtarget.is64Bit())
190
191 if (!Subtarget.hasBasicD()) {
193 if (Subtarget.is64Bit()) {
196 }
197 }
198 }
199
200 // Set operations for 'D' feature.
201
202 if (Subtarget.hasBasicD()) {
203 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
204 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
205 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
206 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
207 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
208
224
225 if (Subtarget.is64Bit())
227 }
228
229 // Set operations for 'LSX' feature.
230
231 if (Subtarget.hasExtLSX()) {
233 // Expand all truncating stores and extending loads.
234 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
235 setTruncStoreAction(VT, InnerVT, Expand);
238 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
239 }
240 // By default everything must be expanded. Then we will selectively turn
241 // on ones that can be effectively codegen'd.
242 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
244 }
245
246 for (MVT VT : LSXVTs) {
250
254
258 }
259 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
262 Legal);
264 VT, Legal);
271 Expand);
272 }
273 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
275 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
277 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
280 }
281 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
289 VT, Expand);
290 }
292 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
293 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
294 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
295 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
296 }
297
298 // Set operations for 'LASX' feature.
299
300 if (Subtarget.hasExtLASX()) {
301 for (MVT VT : LASXVTs) {
305
310
314 }
315 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
318 Legal);
320 VT, Legal);
327 Expand);
328 }
329 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
331 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
333 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
336 }
337 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
345 VT, Expand);
346 }
347 }
348
349 // Set DAG combine for LA32 and LA64.
350
355
356 // Set DAG combine for 'LSX' feature.
357
358 if (Subtarget.hasExtLSX())
360
361 // Compute derived properties from the register classes.
363
365
368
370
372
373 // Function alignments.
375 // Set preferred alignments.
379
380 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
381 if (Subtarget.hasLAMCAS())
383}
384
386 const GlobalAddressSDNode *GA) const {
387 // In order to maximise the opportunity for common subexpression elimination,
388 // keep a separate ADD node for the global address offset instead of folding
389 // it in the global address node. Later peephole optimisations may choose to
390 // fold it back in when profitable.
391 return false;
392}
393
395 SelectionDAG &DAG) const {
396 switch (Op.getOpcode()) {
398 return lowerATOMIC_FENCE(Op, DAG);
400 return lowerEH_DWARF_CFA(Op, DAG);
402 return lowerGlobalAddress(Op, DAG);
404 return lowerGlobalTLSAddress(Op, DAG);
406 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
408 return lowerINTRINSIC_W_CHAIN(Op, DAG);
410 return lowerINTRINSIC_VOID(Op, DAG);
412 return lowerBlockAddress(Op, DAG);
413 case ISD::JumpTable:
414 return lowerJumpTable(Op, DAG);
415 case ISD::SHL_PARTS:
416 return lowerShiftLeftParts(Op, DAG);
417 case ISD::SRA_PARTS:
418 return lowerShiftRightParts(Op, DAG, true);
419 case ISD::SRL_PARTS:
420 return lowerShiftRightParts(Op, DAG, false);
422 return lowerConstantPool(Op, DAG);
423 case ISD::FP_TO_SINT:
424 return lowerFP_TO_SINT(Op, DAG);
425 case ISD::BITCAST:
426 return lowerBITCAST(Op, DAG);
427 case ISD::UINT_TO_FP:
428 return lowerUINT_TO_FP(Op, DAG);
429 case ISD::SINT_TO_FP:
430 return lowerSINT_TO_FP(Op, DAG);
431 case ISD::VASTART:
432 return lowerVASTART(Op, DAG);
433 case ISD::FRAMEADDR:
434 return lowerFRAMEADDR(Op, DAG);
435 case ISD::RETURNADDR:
436 return lowerRETURNADDR(Op, DAG);
438 return lowerWRITE_REGISTER(Op, DAG);
440 return lowerINSERT_VECTOR_ELT(Op, DAG);
442 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
444 return lowerBUILD_VECTOR(Op, DAG);
446 return lowerVECTOR_SHUFFLE(Op, DAG);
447 case ISD::BITREVERSE:
448 return lowerBITREVERSE(Op, DAG);
449 }
450 return SDValue();
451}
452
453SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
454 SelectionDAG &DAG) const {
455 EVT ResTy = Op->getValueType(0);
456 SDValue Src = Op->getOperand(0);
457 SDLoc DL(Op);
458
459 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
460 unsigned int OrigEltNum = ResTy.getVectorNumElements();
461 unsigned int NewEltNum = NewVT.getVectorNumElements();
462
463 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
464
466 for (unsigned int i = 0; i < NewEltNum; i++) {
467 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
468 DAG.getConstant(i, DL, MVT::i64));
469 SDValue RevOp = DAG.getNode((ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
472 DL, MVT::i64, Op);
473 Ops.push_back(RevOp);
474 }
475 SDValue Res =
476 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
477
478 switch (ResTy.getSimpleVT().SimpleTy) {
479 default:
480 return SDValue();
481 case MVT::v16i8:
482 case MVT::v32i8:
483 return Res;
484 case MVT::v8i16:
485 case MVT::v16i16:
486 case MVT::v4i32:
487 case MVT::v8i32: {
489 for (unsigned int i = 0; i < NewEltNum; i++)
490 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
491 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
492 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
493 }
494 }
495}
496
497/// Determine whether a range fits a regular pattern of values.
498/// This function accounts for the possibility of jumping over the End iterator.
499template <typename ValType>
500static bool
502 unsigned CheckStride,
504 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
505 auto &I = Begin;
506
507 while (I != End) {
508 if (*I != -1 && *I != ExpectedIndex)
509 return false;
510 ExpectedIndex += ExpectedIndexStride;
511
512 // Incrementing past End is undefined behaviour so we must increment one
513 // step at a time and check for End at each step.
514 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
515 ; // Empty loop body.
516 }
517 return true;
518}
519
520/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
521///
522/// VREPLVEI performs vector broadcast based on an element specified by an
523/// integer immediate, with its mask being similar to:
524/// <x, x, x, ...>
525/// where x is any valid index.
526///
527/// When undef's appear in the mask they are treated as if they were whatever
528/// value is necessary in order to fit the above form.
530 MVT VT, SDValue V1, SDValue V2,
531 SelectionDAG &DAG) {
532 int SplatIndex = -1;
533 for (const auto &M : Mask) {
534 if (M != -1) {
535 SplatIndex = M;
536 break;
537 }
538 }
539
540 if (SplatIndex == -1)
541 return DAG.getUNDEF(VT);
542
543 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
544 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
545 APInt Imm(64, SplatIndex);
546 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
547 DAG.getConstant(Imm, DL, MVT::i64));
548 }
549
550 return SDValue();
551}
552
553/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
554///
555/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
556/// elements according to a <4 x i2> constant (encoded as an integer immediate).
557///
558/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
559/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
560/// When undef's appear they are treated as if they were whatever value is
561/// necessary in order to fit the above forms.
562///
563/// For example:
564/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
565/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
566/// i32 7, i32 6, i32 5, i32 4>
567/// is lowered to:
568/// (VSHUF4I_H $v0, $v1, 27)
569/// where the 27 comes from:
570/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
572 MVT VT, SDValue V1, SDValue V2,
573 SelectionDAG &DAG) {
574
575 // When the size is less than 4, lower cost instructions may be used.
576 if (Mask.size() < 4)
577 return SDValue();
578
579 int SubMask[4] = {-1, -1, -1, -1};
580 for (unsigned i = 0; i < 4; ++i) {
581 for (unsigned j = i; j < Mask.size(); j += 4) {
582 int Idx = Mask[j];
583
584 // Convert from vector index to 4-element subvector index
585 // If an index refers to an element outside of the subvector then give up
586 if (Idx != -1) {
587 Idx -= 4 * (j / 4);
588 if (Idx < 0 || Idx >= 4)
589 return SDValue();
590 }
591
592 // If the mask has an undef, replace it with the current index.
593 // Note that it might still be undef if the current index is also undef
594 if (SubMask[i] == -1)
595 SubMask[i] = Idx;
596 // Check that non-undef values are the same as in the mask. If they
597 // aren't then give up
598 else if (Idx != -1 && Idx != SubMask[i])
599 return SDValue();
600 }
601 }
602
603 // Calculate the immediate. Replace any remaining undefs with zero
604 APInt Imm(64, 0);
605 for (int i = 3; i >= 0; --i) {
606 int Idx = SubMask[i];
607
608 if (Idx == -1)
609 Idx = 0;
610
611 Imm <<= 2;
612 Imm |= Idx & 0x3;
613 }
614
615 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
616 DAG.getConstant(Imm, DL, MVT::i64));
617}
618
619/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
620///
621/// VPACKEV interleaves the even elements from each vector.
622///
623/// It is possible to lower into VPACKEV when the mask consists of two of the
624/// following forms interleaved:
625/// <0, 2, 4, ...>
626/// <n, n+2, n+4, ...>
627/// where n is the number of elements in the vector.
628/// For example:
629/// <0, 0, 2, 2, 4, 4, ...>
630/// <0, n, 2, n+2, 4, n+4, ...>
631///
632/// When undef's appear in the mask they are treated as if they were whatever
633/// value is necessary in order to fit the above forms.
635 MVT VT, SDValue V1, SDValue V2,
636 SelectionDAG &DAG) {
637
638 const auto &Begin = Mask.begin();
639 const auto &End = Mask.end();
640 SDValue OriV1 = V1, OriV2 = V2;
641
642 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
643 V1 = OriV1;
644 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
645 V1 = OriV2;
646 else
647 return SDValue();
648
649 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
650 V2 = OriV1;
651 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
652 V2 = OriV2;
653 else
654 return SDValue();
655
656 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
657}
658
659/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
660///
661/// VPACKOD interleaves the odd elements from each vector.
662///
663/// It is possible to lower into VPACKOD when the mask consists of two of the
664/// following forms interleaved:
665/// <1, 3, 5, ...>
666/// <n+1, n+3, n+5, ...>
667/// where n is the number of elements in the vector.
668/// For example:
669/// <1, 1, 3, 3, 5, 5, ...>
670/// <1, n+1, 3, n+3, 5, n+5, ...>
671///
672/// When undef's appear in the mask they are treated as if they were whatever
673/// value is necessary in order to fit the above forms.
675 MVT VT, SDValue V1, SDValue V2,
676 SelectionDAG &DAG) {
677
678 const auto &Begin = Mask.begin();
679 const auto &End = Mask.end();
680 SDValue OriV1 = V1, OriV2 = V2;
681
682 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
683 V1 = OriV1;
684 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
685 V1 = OriV2;
686 else
687 return SDValue();
688
689 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
690 V2 = OriV1;
691 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
692 V2 = OriV2;
693 else
694 return SDValue();
695
696 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
697}
698
699/// Lower VECTOR_SHUFFLE into VILVH (if possible).
700///
701/// VILVH interleaves consecutive elements from the left (highest-indexed) half
702/// of each vector.
703///
704/// It is possible to lower into VILVH when the mask consists of two of the
705/// following forms interleaved:
706/// <x, x+1, x+2, ...>
707/// <n+x, n+x+1, n+x+2, ...>
708/// where n is the number of elements in the vector and x is half n.
709/// For example:
710/// <x, x, x+1, x+1, x+2, x+2, ...>
711/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
712///
713/// When undef's appear in the mask they are treated as if they were whatever
714/// value is necessary in order to fit the above forms.
716 MVT VT, SDValue V1, SDValue V2,
717 SelectionDAG &DAG) {
718
719 const auto &Begin = Mask.begin();
720 const auto &End = Mask.end();
721 unsigned HalfSize = Mask.size() / 2;
722 SDValue OriV1 = V1, OriV2 = V2;
723
724 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
725 V1 = OriV1;
726 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
727 V1 = OriV2;
728 else
729 return SDValue();
730
731 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
732 V2 = OriV1;
733 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
734 1))
735 V2 = OriV2;
736 else
737 return SDValue();
738
739 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
740}
741
742/// Lower VECTOR_SHUFFLE into VILVL (if possible).
743///
744/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
745/// of each vector.
746///
747/// It is possible to lower into VILVL when the mask consists of two of the
748/// following forms interleaved:
749/// <0, 1, 2, ...>
750/// <n, n+1, n+2, ...>
751/// where n is the number of elements in the vector.
752/// For example:
753/// <0, 0, 1, 1, 2, 2, ...>
754/// <0, n, 1, n+1, 2, n+2, ...>
755///
756/// When undef's appear in the mask they are treated as if they were whatever
757/// value is necessary in order to fit the above forms.
759 MVT VT, SDValue V1, SDValue V2,
760 SelectionDAG &DAG) {
761
762 const auto &Begin = Mask.begin();
763 const auto &End = Mask.end();
764 SDValue OriV1 = V1, OriV2 = V2;
765
766 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
767 V1 = OriV1;
768 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
769 V1 = OriV2;
770 else
771 return SDValue();
772
773 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
774 V2 = OriV1;
775 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
776 V2 = OriV2;
777 else
778 return SDValue();
779
780 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
781}
782
783/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
784///
785/// VPICKEV copies the even elements of each vector into the result vector.
786///
787/// It is possible to lower into VPICKEV when the mask consists of two of the
788/// following forms concatenated:
789/// <0, 2, 4, ...>
790/// <n, n+2, n+4, ...>
791/// where n is the number of elements in the vector.
792/// For example:
793/// <0, 2, 4, ..., 0, 2, 4, ...>
794/// <0, 2, 4, ..., n, n+2, n+4, ...>
795///
796/// When undef's appear in the mask they are treated as if they were whatever
797/// value is necessary in order to fit the above forms.
799 MVT VT, SDValue V1, SDValue V2,
800 SelectionDAG &DAG) {
801
802 const auto &Begin = Mask.begin();
803 const auto &Mid = Mask.begin() + Mask.size() / 2;
804 const auto &End = Mask.end();
805 SDValue OriV1 = V1, OriV2 = V2;
806
807 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
808 V1 = OriV1;
809 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
810 V1 = OriV2;
811 else
812 return SDValue();
813
814 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
815 V2 = OriV1;
816 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
817 V2 = OriV2;
818
819 else
820 return SDValue();
821
822 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
823}
824
825/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
826///
827/// VPICKOD copies the odd elements of each vector into the result vector.
828///
829/// It is possible to lower into VPICKOD when the mask consists of two of the
830/// following forms concatenated:
831/// <1, 3, 5, ...>
832/// <n+1, n+3, n+5, ...>
833/// where n is the number of elements in the vector.
834/// For example:
835/// <1, 3, 5, ..., 1, 3, 5, ...>
836/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
837///
838/// When undef's appear in the mask they are treated as if they were whatever
839/// value is necessary in order to fit the above forms.
841 MVT VT, SDValue V1, SDValue V2,
842 SelectionDAG &DAG) {
843
844 const auto &Begin = Mask.begin();
845 const auto &Mid = Mask.begin() + Mask.size() / 2;
846 const auto &End = Mask.end();
847 SDValue OriV1 = V1, OriV2 = V2;
848
849 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
850 V1 = OriV1;
851 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
852 V1 = OriV2;
853 else
854 return SDValue();
855
856 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
857 V2 = OriV1;
858 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
859 V2 = OriV2;
860 else
861 return SDValue();
862
863 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
864}
865
866/// Lower VECTOR_SHUFFLE into VSHUF.
867///
868/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
869/// adding it as an operand to the resulting VSHUF.
871 MVT VT, SDValue V1, SDValue V2,
872 SelectionDAG &DAG) {
873
875 for (auto M : Mask)
876 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
877
878 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
879 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
880
881 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
882 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
883 // VSHF concatenates the vectors in a bitwise fashion:
884 // <0b00, 0b01> + <0b10, 0b11> ->
885 // 0b0100 + 0b1110 -> 0b01001110
886 // <0b10, 0b11, 0b00, 0b01>
887 // We must therefore swap the operands to get the correct result.
888 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
889}
890
891/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
892///
893/// This routine breaks down the specific type of 128-bit shuffle and
894/// dispatches to the lowering routines accordingly.
896 SDValue V1, SDValue V2, SelectionDAG &DAG) {
897 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
898 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
899 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
900 "Vector type is unsupported for lsx!");
901 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
902 "Two operands have different types!");
903 assert(VT.getVectorNumElements() == Mask.size() &&
904 "Unexpected mask size for shuffle!");
905 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
906
907 SDValue Result;
908 // TODO: Add more comparison patterns.
909 if (V2.isUndef()) {
910 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
911 return Result;
912 if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
913 return Result;
914
915 // TODO: This comment may be enabled in the future to better match the
916 // pattern for instruction selection.
917 /* V2 = V1; */
918 }
919
920 // It is recommended not to change the pattern comparison order for better
921 // performance.
922 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
923 return Result;
924 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
925 return Result;
926 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
927 return Result;
928 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
929 return Result;
930 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
931 return Result;
932 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
933 return Result;
934 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
935 return Result;
936
937 return SDValue();
938}
939
940/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
941///
942/// It is a XVREPLVEI when the mask is:
943/// <x, x, x, ..., x+n, x+n, x+n, ...>
944/// where the number of x is equal to n and n is half the length of vector.
945///
946/// When undef's appear in the mask they are treated as if they were whatever
947/// value is necessary in order to fit the above form.
949 ArrayRef<int> Mask, MVT VT,
950 SDValue V1, SDValue V2,
951 SelectionDAG &DAG) {
952 int SplatIndex = -1;
953 for (const auto &M : Mask) {
954 if (M != -1) {
955 SplatIndex = M;
956 break;
957 }
958 }
959
960 if (SplatIndex == -1)
961 return DAG.getUNDEF(VT);
962
963 const auto &Begin = Mask.begin();
964 const auto &End = Mask.end();
965 unsigned HalfSize = Mask.size() / 2;
966
967 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
968 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
969 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
970 0)) {
971 APInt Imm(64, SplatIndex);
972 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
973 DAG.getConstant(Imm, DL, MVT::i64));
974 }
975
976 return SDValue();
977}
978
979/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
981 MVT VT, SDValue V1, SDValue V2,
982 SelectionDAG &DAG) {
983 // When the size is less than or equal to 4, lower cost instructions may be
984 // used.
985 if (Mask.size() <= 4)
986 return SDValue();
987 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
988}
989
990/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
992 MVT VT, SDValue V1, SDValue V2,
993 SelectionDAG &DAG) {
994 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
995}
996
997/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
999 MVT VT, SDValue V1, SDValue V2,
1000 SelectionDAG &DAG) {
1001 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
1002}
1003
1004/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
1006 MVT VT, SDValue V1, SDValue V2,
1007 SelectionDAG &DAG) {
1008
1009 const auto &Begin = Mask.begin();
1010 const auto &End = Mask.end();
1011 unsigned HalfSize = Mask.size() / 2;
1012 unsigned LeftSize = HalfSize / 2;
1013 SDValue OriV1 = V1, OriV2 = V2;
1014
1015 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
1016 1) &&
1017 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
1018 V1 = OriV1;
1019 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
1020 Mask.size() + HalfSize - LeftSize, 1) &&
1021 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1022 Mask.size() + HalfSize + LeftSize, 1))
1023 V1 = OriV2;
1024 else
1025 return SDValue();
1026
1027 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
1028 1) &&
1029 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
1030 1))
1031 V2 = OriV1;
1032 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
1033 Mask.size() + HalfSize - LeftSize, 1) &&
1034 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1035 Mask.size() + HalfSize + LeftSize, 1))
1036 V2 = OriV2;
1037 else
1038 return SDValue();
1039
1040 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1041}
1042
1043/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
1045 MVT VT, SDValue V1, SDValue V2,
1046 SelectionDAG &DAG) {
1047
1048 const auto &Begin = Mask.begin();
1049 const auto &End = Mask.end();
1050 unsigned HalfSize = Mask.size() / 2;
1051 SDValue OriV1 = V1, OriV2 = V2;
1052
1053 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
1054 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
1055 V1 = OriV1;
1056 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
1057 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1058 Mask.size() + HalfSize, 1))
1059 V1 = OriV2;
1060 else
1061 return SDValue();
1062
1063 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
1064 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
1065 V2 = OriV1;
1066 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
1067 1) &&
1068 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1069 Mask.size() + HalfSize, 1))
1070 V2 = OriV2;
1071 else
1072 return SDValue();
1073
1074 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1075}
1076
1077/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
1079 MVT VT, SDValue V1, SDValue V2,
1080 SelectionDAG &DAG) {
1081
1082 const auto &Begin = Mask.begin();
1083 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1084 const auto &Mid = Mask.begin() + Mask.size() / 2;
1085 const auto &RightMid = Mask.end() - Mask.size() / 4;
1086 const auto &End = Mask.end();
1087 unsigned HalfSize = Mask.size() / 2;
1088 SDValue OriV1 = V1, OriV2 = V2;
1089
1090 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
1091 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
1092 V1 = OriV1;
1093 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
1094 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
1095 V1 = OriV2;
1096 else
1097 return SDValue();
1098
1099 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
1100 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
1101 V2 = OriV1;
1102 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
1103 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
1104 V2 = OriV2;
1105
1106 else
1107 return SDValue();
1108
1109 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1110}
1111
1112/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
1114 MVT VT, SDValue V1, SDValue V2,
1115 SelectionDAG &DAG) {
1116
1117 const auto &Begin = Mask.begin();
1118 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1119 const auto &Mid = Mask.begin() + Mask.size() / 2;
1120 const auto &RightMid = Mask.end() - Mask.size() / 4;
1121 const auto &End = Mask.end();
1122 unsigned HalfSize = Mask.size() / 2;
1123 SDValue OriV1 = V1, OriV2 = V2;
1124
1125 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
1126 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
1127 V1 = OriV1;
1128 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
1129 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
1130 2))
1131 V1 = OriV2;
1132 else
1133 return SDValue();
1134
1135 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
1136 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
1137 V2 = OriV1;
1138 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
1139 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
1140 2))
1141 V2 = OriV2;
1142 else
1143 return SDValue();
1144
1145 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1146}
1147
1148/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
1150 MVT VT, SDValue V1, SDValue V2,
1151 SelectionDAG &DAG) {
1152
1153 int MaskSize = Mask.size();
1154 int HalfSize = Mask.size() / 2;
1155 const auto &Begin = Mask.begin();
1156 const auto &Mid = Mask.begin() + HalfSize;
1157 const auto &End = Mask.end();
1158
1159 // VECTOR_SHUFFLE concatenates the vectors:
1160 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
1161 // shuffling ->
1162 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
1163 //
1164 // XVSHUF concatenates the vectors:
1165 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
1166 // shuffling ->
1167 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
1168 SmallVector<SDValue, 8> MaskAlloc;
1169 for (auto it = Begin; it < Mid; it++) {
1170 if (*it < 0) // UNDEF
1171 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1172 else if ((*it >= 0 && *it < HalfSize) ||
1173 (*it >= MaskSize && *it <= MaskSize + HalfSize)) {
1174 int M = *it < HalfSize ? *it : *it - HalfSize;
1175 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1176 } else
1177 return SDValue();
1178 }
1179 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
1180
1181 for (auto it = Mid; it < End; it++) {
1182 if (*it < 0) // UNDEF
1183 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1184 else if ((*it >= HalfSize && *it < MaskSize) ||
1185 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
1186 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
1187 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1188 } else
1189 return SDValue();
1190 }
1191 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
1192
1193 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1194 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
1195 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1196}
1197
1198/// Shuffle vectors by lane to generate more optimized instructions.
1199/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
1200///
1201/// Therefore, except for the following four cases, other cases are regarded
1202/// as cross-lane shuffles, where optimization is relatively limited.
1203///
1204/// - Shuffle high, low lanes of two inputs vector
1205/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
1206/// - Shuffle low, high lanes of two inputs vector
1207/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
1208/// - Shuffle low, low lanes of two inputs vector
1209/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
1210/// - Shuffle high, high lanes of two inputs vector
1211/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
1212///
1213/// The first case is the closest to LoongArch instructions and the other
1214/// cases need to be converted to it for processing.
1215///
1216/// This function may modify V1, V2 and Mask
1218 MutableArrayRef<int> Mask, MVT VT,
1219 SDValue &V1, SDValue &V2,
1220 SelectionDAG &DAG) {
1221
1222 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
1223
1224 int MaskSize = Mask.size();
1225 int HalfSize = Mask.size() / 2;
1226
1227 HalfMaskType preMask = None, postMask = None;
1228
1229 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1230 return M < 0 || (M >= 0 && M < HalfSize) ||
1231 (M >= MaskSize && M < MaskSize + HalfSize);
1232 }))
1233 preMask = HighLaneTy;
1234 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1235 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1236 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1237 }))
1238 preMask = LowLaneTy;
1239
1240 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1241 return M < 0 || (M >= 0 && M < HalfSize) ||
1242 (M >= MaskSize && M < MaskSize + HalfSize);
1243 }))
1244 postMask = HighLaneTy;
1245 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1246 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1247 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1248 }))
1249 postMask = LowLaneTy;
1250
1251 // The pre-half of mask is high lane type, and the post-half of mask
1252 // is low lane type, which is closest to the LoongArch instructions.
1253 //
1254 // Note: In the LoongArch architecture, the high lane of mask corresponds
1255 // to the lower 128-bit of vector register, and the low lane of mask
1256 // corresponds the higher 128-bit of vector register.
1257 if (preMask == HighLaneTy && postMask == LowLaneTy) {
1258 return;
1259 }
1260 if (preMask == LowLaneTy && postMask == HighLaneTy) {
1261 V1 = DAG.getBitcast(MVT::v4i64, V1);
1262 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1263 DAG.getConstant(0b01001110, DL, MVT::i64));
1264 V1 = DAG.getBitcast(VT, V1);
1265
1266 if (!V2.isUndef()) {
1267 V2 = DAG.getBitcast(MVT::v4i64, V2);
1268 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1269 DAG.getConstant(0b01001110, DL, MVT::i64));
1270 V2 = DAG.getBitcast(VT, V2);
1271 }
1272
1273 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1274 *it = *it < 0 ? *it : *it - HalfSize;
1275 }
1276 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1277 *it = *it < 0 ? *it : *it + HalfSize;
1278 }
1279 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
1280 V1 = DAG.getBitcast(MVT::v4i64, V1);
1281 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1282 DAG.getConstant(0b11101110, DL, MVT::i64));
1283 V1 = DAG.getBitcast(VT, V1);
1284
1285 if (!V2.isUndef()) {
1286 V2 = DAG.getBitcast(MVT::v4i64, V2);
1287 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1288 DAG.getConstant(0b11101110, DL, MVT::i64));
1289 V2 = DAG.getBitcast(VT, V2);
1290 }
1291
1292 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1293 *it = *it < 0 ? *it : *it - HalfSize;
1294 }
1295 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
1296 V1 = DAG.getBitcast(MVT::v4i64, V1);
1297 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1298 DAG.getConstant(0b01000100, DL, MVT::i64));
1299 V1 = DAG.getBitcast(VT, V1);
1300
1301 if (!V2.isUndef()) {
1302 V2 = DAG.getBitcast(MVT::v4i64, V2);
1303 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1304 DAG.getConstant(0b01000100, DL, MVT::i64));
1305 V2 = DAG.getBitcast(VT, V2);
1306 }
1307
1308 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1309 *it = *it < 0 ? *it : *it + HalfSize;
1310 }
1311 } else { // cross-lane
1312 return;
1313 }
1314}
1315
1316/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
1317///
1318/// This routine breaks down the specific type of 256-bit shuffle and
1319/// dispatches to the lowering routines accordingly.
1321 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1322 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
1323 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
1324 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
1325 "Vector type is unsupported for lasx!");
1326 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1327 "Two operands have different types!");
1328 assert(VT.getVectorNumElements() == Mask.size() &&
1329 "Unexpected mask size for shuffle!");
1330 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1331 assert(Mask.size() >= 4 && "Mask size is less than 4.");
1332
1333 // canonicalize non cross-lane shuffle vector
1334 SmallVector<int> NewMask(Mask);
1335 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);
1336
1337 SDValue Result;
1338 // TODO: Add more comparison patterns.
1339 if (V2.isUndef()) {
1340 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG)))
1341 return Result;
1342 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
1343 return Result;
1344
1345 // TODO: This comment may be enabled in the future to better match the
1346 // pattern for instruction selection.
1347 /* V2 = V1; */
1348 }
1349
1350 // It is recommended not to change the pattern comparison order for better
1351 // performance.
1352 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
1353 return Result;
1354 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
1355 return Result;
1356 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
1357 return Result;
1358 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
1359 return Result;
1360 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
1361 return Result;
1362 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
1363 return Result;
1364 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
1365 return Result;
1366
1367 return SDValue();
1368}
1369
1370SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
1371 SelectionDAG &DAG) const {
1372 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
1373 ArrayRef<int> OrigMask = SVOp->getMask();
1374 SDValue V1 = Op.getOperand(0);
1375 SDValue V2 = Op.getOperand(1);
1376 MVT VT = Op.getSimpleValueType();
1377 int NumElements = VT.getVectorNumElements();
1378 SDLoc DL(Op);
1379
1380 bool V1IsUndef = V1.isUndef();
1381 bool V2IsUndef = V2.isUndef();
1382 if (V1IsUndef && V2IsUndef)
1383 return DAG.getUNDEF(VT);
1384
1385 // When we create a shuffle node we put the UNDEF node to second operand,
1386 // but in some cases the first operand may be transformed to UNDEF.
1387 // In this case we should just commute the node.
1388 if (V1IsUndef)
1389 return DAG.getCommutedVectorShuffle(*SVOp);
1390
1391 // Check for non-undef masks pointing at an undef vector and make the masks
1392 // undef as well. This makes it easier to match the shuffle based solely on
1393 // the mask.
1394 if (V2IsUndef &&
1395 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
1396 SmallVector<int, 8> NewMask(OrigMask);
1397 for (int &M : NewMask)
1398 if (M >= NumElements)
1399 M = -1;
1400 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
1401 }
1402
1403 // Check for illegal shuffle mask element index values.
1404 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
1405 (void)MaskUpperLimit;
1406 assert(llvm::all_of(OrigMask,
1407 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
1408 "Out of bounds shuffle index");
1409
1410 // For each vector width, delegate to a specialized lowering routine.
1411 if (VT.is128BitVector())
1412 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1413
1414 if (VT.is256BitVector())
1415 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1416
1417 return SDValue();
1418}
1419
1420static bool isConstantOrUndef(const SDValue Op) {
1421 if (Op->isUndef())
1422 return true;
1423 if (isa<ConstantSDNode>(Op))
1424 return true;
1425 if (isa<ConstantFPSDNode>(Op))
1426 return true;
1427 return false;
1428}
1429
1431 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
1432 if (isConstantOrUndef(Op->getOperand(i)))
1433 return true;
1434 return false;
1435}
1436
1437SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
1438 SelectionDAG &DAG) const {
1439 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
1440 EVT ResTy = Op->getValueType(0);
1441 SDLoc DL(Op);
1442 APInt SplatValue, SplatUndef;
1443 unsigned SplatBitSize;
1444 bool HasAnyUndefs;
1445 bool Is128Vec = ResTy.is128BitVector();
1446 bool Is256Vec = ResTy.is256BitVector();
1447
1448 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
1449 (!Subtarget.hasExtLASX() || !Is256Vec))
1450 return SDValue();
1451
1452 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
1453 /*MinSplatBits=*/8) &&
1454 SplatBitSize <= 64) {
1455 // We can only cope with 8, 16, 32, or 64-bit elements.
1456 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
1457 SplatBitSize != 64)
1458 return SDValue();
1459
1460 EVT ViaVecTy;
1461
1462 switch (SplatBitSize) {
1463 default:
1464 return SDValue();
1465 case 8:
1466 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
1467 break;
1468 case 16:
1469 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
1470 break;
1471 case 32:
1472 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
1473 break;
1474 case 64:
1475 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
1476 break;
1477 }
1478
1479 // SelectionDAG::getConstant will promote SplatValue appropriately.
1480 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
1481
1482 // Bitcast to the type we originally wanted.
1483 if (ViaVecTy != ResTy)
1484 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
1485
1486 return Result;
1487 }
1488
1489 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
1490 return Op;
1491
1493 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
1494 // The resulting code is the same length as the expansion, but it doesn't
1495 // use memory operations.
1496 EVT ResTy = Node->getValueType(0);
1497
1498 assert(ResTy.isVector());
1499
1500 unsigned NumElts = ResTy.getVectorNumElements();
1501 SDValue Vector = DAG.getUNDEF(ResTy);
1502 for (unsigned i = 0; i < NumElts; ++i) {
1504 Node->getOperand(i),
1505 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1506 }
1507 return Vector;
1508 }
1509
1510 return SDValue();
1511}
1512
1513SDValue
1514LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1515 SelectionDAG &DAG) const {
1516 EVT VecTy = Op->getOperand(0)->getValueType(0);
1517 SDValue Idx = Op->getOperand(1);
1518 EVT EltTy = VecTy.getVectorElementType();
1519 unsigned NumElts = VecTy.getVectorNumElements();
1520
1521 if (isa<ConstantSDNode>(Idx) &&
1522 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
1523 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
1524 return Op;
1525
1526 return SDValue();
1527}
1528
1529SDValue
1530LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1531 SelectionDAG &DAG) const {
1532 if (isa<ConstantSDNode>(Op->getOperand(2)))
1533 return Op;
1534 return SDValue();
1535}
1536
1537SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
1538 SelectionDAG &DAG) const {
1539 SDLoc DL(Op);
1540 SyncScope::ID FenceSSID =
1541 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1542
1543 // singlethread fences only synchronize with signal handlers on the same
1544 // thread and thus only need to preserve instruction order, not actually
1545 // enforce memory ordering.
1546 if (FenceSSID == SyncScope::SingleThread)
1547 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1548 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1549
1550 return Op;
1551}
1552
1553SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
1554 SelectionDAG &DAG) const {
1555
1556 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
1557 DAG.getContext()->emitError(
1558 "On LA64, only 64-bit registers can be written.");
1559 return Op.getOperand(0);
1560 }
1561
1562 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
1563 DAG.getContext()->emitError(
1564 "On LA32, only 32-bit registers can be written.");
1565 return Op.getOperand(0);
1566 }
1567
1568 return Op;
1569}
1570
1571SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
1572 SelectionDAG &DAG) const {
1573 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
1574 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
1575 "be a constant integer");
1576 return SDValue();
1577 }
1578
1581 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
1582 EVT VT = Op.getValueType();
1583 SDLoc DL(Op);
1584 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1585 unsigned Depth = Op.getConstantOperandVal(0);
1586 int GRLenInBytes = Subtarget.getGRLen() / 8;
1587
1588 while (Depth--) {
1589 int Offset = -(GRLenInBytes * 2);
1590 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1591 DAG.getSignedConstant(Offset, DL, VT));
1592 FrameAddr =
1593 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1594 }
1595 return FrameAddr;
1596}
1597
1598SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
1599 SelectionDAG &DAG) const {
1601 return SDValue();
1602
1603 // Currently only support lowering return address for current frame.
1604 if (Op.getConstantOperandVal(0) != 0) {
1605 DAG.getContext()->emitError(
1606 "return address can only be determined for the current frame");
1607 return SDValue();
1608 }
1609
1612 MVT GRLenVT = Subtarget.getGRLenVT();
1613
1614 // Return the value of the return address register, marking it an implicit
1615 // live-in.
1616 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
1617 getRegClassFor(GRLenVT));
1618 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
1619}
1620
1621SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
1622 SelectionDAG &DAG) const {
1624 auto Size = Subtarget.getGRLen() / 8;
1625 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
1626 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1627}
1628
1629SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
1630 SelectionDAG &DAG) const {
1632 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
1633
1634 SDLoc DL(Op);
1635 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1637
1638 // vastart just stores the address of the VarArgsFrameIndex slot into the
1639 // memory location argument.
1640 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1641 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1642 MachinePointerInfo(SV));
1643}
1644
1645SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
1646 SelectionDAG &DAG) const {
1647 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1648 !Subtarget.hasBasicD() && "unexpected target features");
1649
1650 SDLoc DL(Op);
1651 SDValue Op0 = Op.getOperand(0);
1652 if (Op0->getOpcode() == ISD::AND) {
1653 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
1654 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
1655 return Op;
1656 }
1657
1658 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
1659 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
1660 Op0.getConstantOperandVal(2) == UINT64_C(0))
1661 return Op;
1662
1663 if (Op0.getOpcode() == ISD::AssertZext &&
1664 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
1665 return Op;
1666
1667 EVT OpVT = Op0.getValueType();
1668 EVT RetVT = Op.getValueType();
1669 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
1670 MakeLibCallOptions CallOptions;
1671 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1672 SDValue Chain = SDValue();
1674 std::tie(Result, Chain) =
1675 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1676 return Result;
1677}
1678
1679SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
1680 SelectionDAG &DAG) const {
1681 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1682 !Subtarget.hasBasicD() && "unexpected target features");
1683
1684 SDLoc DL(Op);
1685 SDValue Op0 = Op.getOperand(0);
1686
1687 if ((Op0.getOpcode() == ISD::AssertSext ||
1689 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
1690 return Op;
1691
1692 EVT OpVT = Op0.getValueType();
1693 EVT RetVT = Op.getValueType();
1694 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
1695 MakeLibCallOptions CallOptions;
1696 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1697 SDValue Chain = SDValue();
1699 std::tie(Result, Chain) =
1700 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1701 return Result;
1702}
1703
1704SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
1705 SelectionDAG &DAG) const {
1706
1707 SDLoc DL(Op);
1708 SDValue Op0 = Op.getOperand(0);
1709
1710 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
1711 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
1712 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1713 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
1714 }
1715 return Op;
1716}
1717
1718SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
1719 SelectionDAG &DAG) const {
1720
1721 SDLoc DL(Op);
1722 SDValue Op0 = Op.getOperand(0);
1723
1724 if (Op0.getValueType() == MVT::f16)
1725 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
1726
1727 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
1728 !Subtarget.hasBasicD()) {
1729 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
1730 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
1731 }
1732
1733 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
1734 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
1735 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
1736}
1737
1739 SelectionDAG &DAG, unsigned Flags) {
1740 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1741}
1742
1744 SelectionDAG &DAG, unsigned Flags) {
1745 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1746 Flags);
1747}
1748
1750 SelectionDAG &DAG, unsigned Flags) {
1751 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1752 N->getOffset(), Flags);
1753}
1754
1756 SelectionDAG &DAG, unsigned Flags) {
1757 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1758}
1759
1760template <class NodeTy>
1761SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1763 bool IsLocal) const {
1764 SDLoc DL(N);
1765 EVT Ty = getPointerTy(DAG.getDataLayout());
1766 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1767 SDValue Load;
1768
1769 switch (M) {
1770 default:
1771 report_fatal_error("Unsupported code model");
1772
1773 case CodeModel::Large: {
1774 assert(Subtarget.is64Bit() && "Large code model requires LA64");
1775
1776 // This is not actually used, but is necessary for successfully matching
1777 // the PseudoLA_*_LARGE nodes.
1778 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1779 if (IsLocal) {
1780 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
1781 // eventually becomes the desired 5-insn code sequence.
1782 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
1783 Tmp, Addr),
1784 0);
1785 } else {
1786 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
1787 // eventually becomes the desired 5-insn code sequence.
1788 Load = SDValue(
1789 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
1790 0);
1791 }
1792 break;
1793 }
1794
1795 case CodeModel::Small:
1796 case CodeModel::Medium:
1797 if (IsLocal) {
1798 // This generates the pattern (PseudoLA_PCREL sym), which expands to
1799 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
1800 Load = SDValue(
1801 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
1802 } else {
1803 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
1804 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
1805 Load =
1806 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
1807 }
1808 }
1809
1810 if (!IsLocal) {
1811 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1817 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1818 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
1819 }
1820
1821 return Load;
1822}
1823
1824SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
1825 SelectionDAG &DAG) const {
1826 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
1827 DAG.getTarget().getCodeModel());
1828}
1829
1830SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
1831 SelectionDAG &DAG) const {
1832 return getAddr(cast<JumpTableSDNode>(Op), DAG,
1833 DAG.getTarget().getCodeModel());
1834}
1835
1836SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
1837 SelectionDAG &DAG) const {
1838 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
1839 DAG.getTarget().getCodeModel());
1840}
1841
1842SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
1843 SelectionDAG &DAG) const {
1844 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1845 assert(N->getOffset() == 0 && "unexpected offset in global node");
1846 auto CM = DAG.getTarget().getCodeModel();
1847 const GlobalValue *GV = N->getGlobal();
1848
1849 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
1850 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
1851 CM = *GCM;
1852 }
1853
1854 return getAddr(N, DAG, CM, GV->isDSOLocal());
1855}
1856
1857SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1858 SelectionDAG &DAG,
1859 unsigned Opc, bool UseGOT,
1860 bool Large) const {
1861 SDLoc DL(N);
1862 EVT Ty = getPointerTy(DAG.getDataLayout());
1863 MVT GRLenVT = Subtarget.getGRLenVT();
1864
1865 // This is not actually used, but is necessary for successfully matching the
1866 // PseudoLA_*_LARGE nodes.
1867 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1868 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1870 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1871 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1872 if (UseGOT) {
1873 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1879 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1880 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
1881 }
1882
1883 // Add the thread pointer.
1884 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
1885 DAG.getRegister(LoongArch::R2, GRLenVT));
1886}
1887
1888SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1889 SelectionDAG &DAG,
1890 unsigned Opc,
1891 bool Large) const {
1892 SDLoc DL(N);
1893 EVT Ty = getPointerTy(DAG.getDataLayout());
1894 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1895
1896 // This is not actually used, but is necessary for successfully matching the
1897 // PseudoLA_*_LARGE nodes.
1898 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1899
1900 // Use a PC-relative addressing mode to access the dynamic GOT address.
1901 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1902 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1903 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1904
1905 // Prepare argument list to generate call.
1907 ArgListEntry Entry;
1908 Entry.Node = Load;
1909 Entry.Ty = CallTy;
1910 Args.push_back(Entry);
1911
1912 // Setup call to __tls_get_addr.
1914 CLI.setDebugLoc(DL)
1915 .setChain(DAG.getEntryNode())
1916 .setLibCallee(CallingConv::C, CallTy,
1917 DAG.getExternalSymbol("__tls_get_addr", Ty),
1918 std::move(Args));
1919
1920 return LowerCallTo(CLI).first;
1921}
1922
1923SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
1924 SelectionDAG &DAG, unsigned Opc,
1925 bool Large) const {
1926 SDLoc DL(N);
1927 EVT Ty = getPointerTy(DAG.getDataLayout());
1928 const GlobalValue *GV = N->getGlobal();
1929
1930 // This is not actually used, but is necessary for successfully matching the
1931 // PseudoLA_*_LARGE nodes.
1932 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1933
1934 // Use a PC-relative addressing mode to access the global dynamic GOT address.
1935 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
1936 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1937 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1938 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1939}
1940
1941SDValue
1942LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1943 SelectionDAG &DAG) const {
1946 report_fatal_error("In GHC calling convention TLS is not supported");
1947
1948 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
1949 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
1950
1951 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1952 assert(N->getOffset() == 0 && "unexpected offset in global node");
1953
1954 if (DAG.getTarget().useEmulatedTLS())
1955 report_fatal_error("the emulated TLS is prohibited",
1956 /*GenCrashDiag=*/false);
1957
1958 bool IsDesc = DAG.getTarget().useTLSDESC();
1959
1960 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
1962 // In this model, application code calls the dynamic linker function
1963 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
1964 // runtime.
1965 if (!IsDesc)
1966 return getDynamicTLSAddr(N, DAG,
1967 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
1968 : LoongArch::PseudoLA_TLS_GD,
1969 Large);
1970 break;
1972 // Same as GeneralDynamic, except for assembly modifiers and relocation
1973 // records.
1974 if (!IsDesc)
1975 return getDynamicTLSAddr(N, DAG,
1976 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
1977 : LoongArch::PseudoLA_TLS_LD,
1978 Large);
1979 break;
1981 // This model uses the GOT to resolve TLS offsets.
1982 return getStaticTLSAddr(N, DAG,
1983 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
1984 : LoongArch::PseudoLA_TLS_IE,
1985 /*UseGOT=*/true, Large);
1987 // This model is used when static linking as the TLS offsets are resolved
1988 // during program linking.
1989 //
1990 // This node doesn't need an extra argument for the large code model.
1991 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
1992 /*UseGOT=*/false);
1993 }
1994
1995 return getTLSDescAddr(N, DAG,
1996 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
1997 : LoongArch::PseudoLA_TLS_DESC,
1998 Large);
1999}
2000
2001template <unsigned N>
2003 SelectionDAG &DAG, bool IsSigned = false) {
2004 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
2005 // Check the ImmArg.
2006 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2007 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2008 DAG.getContext()->emitError(Op->getOperationName(0) +
2009 ": argument out of range.");
2010 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
2011 }
2012 return SDValue();
2013}
2014
2015SDValue
2016LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
2017 SelectionDAG &DAG) const {
2018 SDLoc DL(Op);
2019 switch (Op.getConstantOperandVal(0)) {
2020 default:
2021 return SDValue(); // Don't custom lower most intrinsics.
2022 case Intrinsic::thread_pointer: {
2023 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2024 return DAG.getRegister(LoongArch::R2, PtrVT);
2025 }
2026 case Intrinsic::loongarch_lsx_vpickve2gr_d:
2027 case Intrinsic::loongarch_lsx_vpickve2gr_du:
2028 case Intrinsic::loongarch_lsx_vreplvei_d:
2029 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
2030 return checkIntrinsicImmArg<1>(Op, 2, DAG);
2031 case Intrinsic::loongarch_lsx_vreplvei_w:
2032 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
2033 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
2034 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
2035 case Intrinsic::loongarch_lasx_xvpickve_d:
2036 case Intrinsic::loongarch_lasx_xvpickve_d_f:
2037 return checkIntrinsicImmArg<2>(Op, 2, DAG);
2038 case Intrinsic::loongarch_lasx_xvinsve0_d:
2039 return checkIntrinsicImmArg<2>(Op, 3, DAG);
2040 case Intrinsic::loongarch_lsx_vsat_b:
2041 case Intrinsic::loongarch_lsx_vsat_bu:
2042 case Intrinsic::loongarch_lsx_vrotri_b:
2043 case Intrinsic::loongarch_lsx_vsllwil_h_b:
2044 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
2045 case Intrinsic::loongarch_lsx_vsrlri_b:
2046 case Intrinsic::loongarch_lsx_vsrari_b:
2047 case Intrinsic::loongarch_lsx_vreplvei_h:
2048 case Intrinsic::loongarch_lasx_xvsat_b:
2049 case Intrinsic::loongarch_lasx_xvsat_bu:
2050 case Intrinsic::loongarch_lasx_xvrotri_b:
2051 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
2052 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
2053 case Intrinsic::loongarch_lasx_xvsrlri_b:
2054 case Intrinsic::loongarch_lasx_xvsrari_b:
2055 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
2056 case Intrinsic::loongarch_lasx_xvpickve_w:
2057 case Intrinsic::loongarch_lasx_xvpickve_w_f:
2058 return checkIntrinsicImmArg<3>(Op, 2, DAG);
2059 case Intrinsic::loongarch_lasx_xvinsve0_w:
2060 return checkIntrinsicImmArg<3>(Op, 3, DAG);
2061 case Intrinsic::loongarch_lsx_vsat_h:
2062 case Intrinsic::loongarch_lsx_vsat_hu:
2063 case Intrinsic::loongarch_lsx_vrotri_h:
2064 case Intrinsic::loongarch_lsx_vsllwil_w_h:
2065 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
2066 case Intrinsic::loongarch_lsx_vsrlri_h:
2067 case Intrinsic::loongarch_lsx_vsrari_h:
2068 case Intrinsic::loongarch_lsx_vreplvei_b:
2069 case Intrinsic::loongarch_lasx_xvsat_h:
2070 case Intrinsic::loongarch_lasx_xvsat_hu:
2071 case Intrinsic::loongarch_lasx_xvrotri_h:
2072 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
2073 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
2074 case Intrinsic::loongarch_lasx_xvsrlri_h:
2075 case Intrinsic::loongarch_lasx_xvsrari_h:
2076 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
2077 return checkIntrinsicImmArg<4>(Op, 2, DAG);
2078 case Intrinsic::loongarch_lsx_vsrlni_b_h:
2079 case Intrinsic::loongarch_lsx_vsrani_b_h:
2080 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
2081 case Intrinsic::loongarch_lsx_vsrarni_b_h:
2082 case Intrinsic::loongarch_lsx_vssrlni_b_h:
2083 case Intrinsic::loongarch_lsx_vssrani_b_h:
2084 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
2085 case Intrinsic::loongarch_lsx_vssrani_bu_h:
2086 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
2087 case Intrinsic::loongarch_lsx_vssrarni_b_h:
2088 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
2089 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
2090 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
2091 case Intrinsic::loongarch_lasx_xvsrani_b_h:
2092 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
2093 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
2094 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
2095 case Intrinsic::loongarch_lasx_xvssrani_b_h:
2096 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
2097 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
2098 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
2099 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
2100 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
2101 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
2102 return checkIntrinsicImmArg<4>(Op, 3, DAG);
2103 case Intrinsic::loongarch_lsx_vsat_w:
2104 case Intrinsic::loongarch_lsx_vsat_wu:
2105 case Intrinsic::loongarch_lsx_vrotri_w:
2106 case Intrinsic::loongarch_lsx_vsllwil_d_w:
2107 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
2108 case Intrinsic::loongarch_lsx_vsrlri_w:
2109 case Intrinsic::loongarch_lsx_vsrari_w:
2110 case Intrinsic::loongarch_lsx_vslei_bu:
2111 case Intrinsic::loongarch_lsx_vslei_hu:
2112 case Intrinsic::loongarch_lsx_vslei_wu:
2113 case Intrinsic::loongarch_lsx_vslei_du:
2114 case Intrinsic::loongarch_lsx_vslti_bu:
2115 case Intrinsic::loongarch_lsx_vslti_hu:
2116 case Intrinsic::loongarch_lsx_vslti_wu:
2117 case Intrinsic::loongarch_lsx_vslti_du:
2118 case Intrinsic::loongarch_lsx_vbsll_v:
2119 case Intrinsic::loongarch_lsx_vbsrl_v:
2120 case Intrinsic::loongarch_lasx_xvsat_w:
2121 case Intrinsic::loongarch_lasx_xvsat_wu:
2122 case Intrinsic::loongarch_lasx_xvrotri_w:
2123 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
2124 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
2125 case Intrinsic::loongarch_lasx_xvsrlri_w:
2126 case Intrinsic::loongarch_lasx_xvsrari_w:
2127 case Intrinsic::loongarch_lasx_xvslei_bu:
2128 case Intrinsic::loongarch_lasx_xvslei_hu:
2129 case Intrinsic::loongarch_lasx_xvslei_wu:
2130 case Intrinsic::loongarch_lasx_xvslei_du:
2131 case Intrinsic::loongarch_lasx_xvslti_bu:
2132 case Intrinsic::loongarch_lasx_xvslti_hu:
2133 case Intrinsic::loongarch_lasx_xvslti_wu:
2134 case Intrinsic::loongarch_lasx_xvslti_du:
2135 case Intrinsic::loongarch_lasx_xvbsll_v:
2136 case Intrinsic::loongarch_lasx_xvbsrl_v:
2137 return checkIntrinsicImmArg<5>(Op, 2, DAG);
2138 case Intrinsic::loongarch_lsx_vseqi_b:
2139 case Intrinsic::loongarch_lsx_vseqi_h:
2140 case Intrinsic::loongarch_lsx_vseqi_w:
2141 case Intrinsic::loongarch_lsx_vseqi_d:
2142 case Intrinsic::loongarch_lsx_vslei_b:
2143 case Intrinsic::loongarch_lsx_vslei_h:
2144 case Intrinsic::loongarch_lsx_vslei_w:
2145 case Intrinsic::loongarch_lsx_vslei_d:
2146 case Intrinsic::loongarch_lsx_vslti_b:
2147 case Intrinsic::loongarch_lsx_vslti_h:
2148 case Intrinsic::loongarch_lsx_vslti_w:
2149 case Intrinsic::loongarch_lsx_vslti_d:
2150 case Intrinsic::loongarch_lasx_xvseqi_b:
2151 case Intrinsic::loongarch_lasx_xvseqi_h:
2152 case Intrinsic::loongarch_lasx_xvseqi_w:
2153 case Intrinsic::loongarch_lasx_xvseqi_d:
2154 case Intrinsic::loongarch_lasx_xvslei_b:
2155 case Intrinsic::loongarch_lasx_xvslei_h:
2156 case Intrinsic::loongarch_lasx_xvslei_w:
2157 case Intrinsic::loongarch_lasx_xvslei_d:
2158 case Intrinsic::loongarch_lasx_xvslti_b:
2159 case Intrinsic::loongarch_lasx_xvslti_h:
2160 case Intrinsic::loongarch_lasx_xvslti_w:
2161 case Intrinsic::loongarch_lasx_xvslti_d:
2162 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
2163 case Intrinsic::loongarch_lsx_vsrlni_h_w:
2164 case Intrinsic::loongarch_lsx_vsrani_h_w:
2165 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
2166 case Intrinsic::loongarch_lsx_vsrarni_h_w:
2167 case Intrinsic::loongarch_lsx_vssrlni_h_w:
2168 case Intrinsic::loongarch_lsx_vssrani_h_w:
2169 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
2170 case Intrinsic::loongarch_lsx_vssrani_hu_w:
2171 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
2172 case Intrinsic::loongarch_lsx_vssrarni_h_w:
2173 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
2174 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
2175 case Intrinsic::loongarch_lsx_vfrstpi_b:
2176 case Intrinsic::loongarch_lsx_vfrstpi_h:
2177 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
2178 case Intrinsic::loongarch_lasx_xvsrani_h_w:
2179 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
2180 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
2181 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
2182 case Intrinsic::loongarch_lasx_xvssrani_h_w:
2183 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
2184 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
2185 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
2186 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
2187 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
2188 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
2189 case Intrinsic::loongarch_lasx_xvfrstpi_b:
2190 case Intrinsic::loongarch_lasx_xvfrstpi_h:
2191 return checkIntrinsicImmArg<5>(Op, 3, DAG);
2192 case Intrinsic::loongarch_lsx_vsat_d:
2193 case Intrinsic::loongarch_lsx_vsat_du:
2194 case Intrinsic::loongarch_lsx_vrotri_d:
2195 case Intrinsic::loongarch_lsx_vsrlri_d:
2196 case Intrinsic::loongarch_lsx_vsrari_d:
2197 case Intrinsic::loongarch_lasx_xvsat_d:
2198 case Intrinsic::loongarch_lasx_xvsat_du:
2199 case Intrinsic::loongarch_lasx_xvrotri_d:
2200 case Intrinsic::loongarch_lasx_xvsrlri_d:
2201 case Intrinsic::loongarch_lasx_xvsrari_d:
2202 return checkIntrinsicImmArg<6>(Op, 2, DAG);
2203 case Intrinsic::loongarch_lsx_vsrlni_w_d:
2204 case Intrinsic::loongarch_lsx_vsrani_w_d:
2205 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
2206 case Intrinsic::loongarch_lsx_vsrarni_w_d:
2207 case Intrinsic::loongarch_lsx_vssrlni_w_d:
2208 case Intrinsic::loongarch_lsx_vssrani_w_d:
2209 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
2210 case Intrinsic::loongarch_lsx_vssrani_wu_d:
2211 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
2212 case Intrinsic::loongarch_lsx_vssrarni_w_d:
2213 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
2214 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
2215 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
2216 case Intrinsic::loongarch_lasx_xvsrani_w_d:
2217 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
2218 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
2219 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
2220 case Intrinsic::loongarch_lasx_xvssrani_w_d:
2221 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
2222 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
2223 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
2224 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
2225 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
2226 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
2227 return checkIntrinsicImmArg<6>(Op, 3, DAG);
2228 case Intrinsic::loongarch_lsx_vsrlni_d_q:
2229 case Intrinsic::loongarch_lsx_vsrani_d_q:
2230 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
2231 case Intrinsic::loongarch_lsx_vsrarni_d_q:
2232 case Intrinsic::loongarch_lsx_vssrlni_d_q:
2233 case Intrinsic::loongarch_lsx_vssrani_d_q:
2234 case Intrinsic::loongarch_lsx_vssrlni_du_q:
2235 case Intrinsic::loongarch_lsx_vssrani_du_q:
2236 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
2237 case Intrinsic::loongarch_lsx_vssrarni_d_q:
2238 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
2239 case Intrinsic::loongarch_lsx_vssrarni_du_q:
2240 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
2241 case Intrinsic::loongarch_lasx_xvsrani_d_q:
2242 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
2243 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
2244 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
2245 case Intrinsic::loongarch_lasx_xvssrani_d_q:
2246 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
2247 case Intrinsic::loongarch_lasx_xvssrani_du_q:
2248 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
2249 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
2250 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
2251 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
2252 return checkIntrinsicImmArg<7>(Op, 3, DAG);
2253 case Intrinsic::loongarch_lsx_vnori_b:
2254 case Intrinsic::loongarch_lsx_vshuf4i_b:
2255 case Intrinsic::loongarch_lsx_vshuf4i_h:
2256 case Intrinsic::loongarch_lsx_vshuf4i_w:
2257 case Intrinsic::loongarch_lasx_xvnori_b:
2258 case Intrinsic::loongarch_lasx_xvshuf4i_b:
2259 case Intrinsic::loongarch_lasx_xvshuf4i_h:
2260 case Intrinsic::loongarch_lasx_xvshuf4i_w:
2261 case Intrinsic::loongarch_lasx_xvpermi_d:
2262 return checkIntrinsicImmArg<8>(Op, 2, DAG);
2263 case Intrinsic::loongarch_lsx_vshuf4i_d:
2264 case Intrinsic::loongarch_lsx_vpermi_w:
2265 case Intrinsic::loongarch_lsx_vbitseli_b:
2266 case Intrinsic::loongarch_lsx_vextrins_b:
2267 case Intrinsic::loongarch_lsx_vextrins_h:
2268 case Intrinsic::loongarch_lsx_vextrins_w:
2269 case Intrinsic::loongarch_lsx_vextrins_d:
2270 case Intrinsic::loongarch_lasx_xvshuf4i_d:
2271 case Intrinsic::loongarch_lasx_xvpermi_w:
2272 case Intrinsic::loongarch_lasx_xvpermi_q:
2273 case Intrinsic::loongarch_lasx_xvbitseli_b:
2274 case Intrinsic::loongarch_lasx_xvextrins_b:
2275 case Intrinsic::loongarch_lasx_xvextrins_h:
2276 case Intrinsic::loongarch_lasx_xvextrins_w:
2277 case Intrinsic::loongarch_lasx_xvextrins_d:
2278 return checkIntrinsicImmArg<8>(Op, 3, DAG);
2279 case Intrinsic::loongarch_lsx_vrepli_b:
2280 case Intrinsic::loongarch_lsx_vrepli_h:
2281 case Intrinsic::loongarch_lsx_vrepli_w:
2282 case Intrinsic::loongarch_lsx_vrepli_d:
2283 case Intrinsic::loongarch_lasx_xvrepli_b:
2284 case Intrinsic::loongarch_lasx_xvrepli_h:
2285 case Intrinsic::loongarch_lasx_xvrepli_w:
2286 case Intrinsic::loongarch_lasx_xvrepli_d:
2287 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
2288 case Intrinsic::loongarch_lsx_vldi:
2289 case Intrinsic::loongarch_lasx_xvldi:
2290 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
2291 }
2292}
2293
2294// Helper function that emits error message for intrinsics with chain and return
2295// merge values of a UNDEF and the chain.
2297 StringRef ErrorMsg,
2298 SelectionDAG &DAG) {
2299 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2300 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
2301 SDLoc(Op));
2302}
2303
2304SDValue
2305LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2306 SelectionDAG &DAG) const {
2307 SDLoc DL(Op);
2308 MVT GRLenVT = Subtarget.getGRLenVT();
2309 EVT VT = Op.getValueType();
2310 SDValue Chain = Op.getOperand(0);
2311 const StringRef ErrorMsgOOR = "argument out of range";
2312 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2313 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2314
2315 switch (Op.getConstantOperandVal(1)) {
2316 default:
2317 return Op;
2318 case Intrinsic::loongarch_crc_w_b_w:
2319 case Intrinsic::loongarch_crc_w_h_w:
2320 case Intrinsic::loongarch_crc_w_w_w:
2321 case Intrinsic::loongarch_crc_w_d_w:
2322 case Intrinsic::loongarch_crcc_w_b_w:
2323 case Intrinsic::loongarch_crcc_w_h_w:
2324 case Intrinsic::loongarch_crcc_w_w_w:
2325 case Intrinsic::loongarch_crcc_w_d_w:
2326 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
2327 case Intrinsic::loongarch_csrrd_w:
2328 case Intrinsic::loongarch_csrrd_d: {
2329 unsigned Imm = Op.getConstantOperandVal(2);
2330 return !isUInt<14>(Imm)
2331 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2332 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2333 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2334 }
2335 case Intrinsic::loongarch_csrwr_w:
2336 case Intrinsic::loongarch_csrwr_d: {
2337 unsigned Imm = Op.getConstantOperandVal(3);
2338 return !isUInt<14>(Imm)
2339 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2340 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2341 {Chain, Op.getOperand(2),
2342 DAG.getConstant(Imm, DL, GRLenVT)});
2343 }
2344 case Intrinsic::loongarch_csrxchg_w:
2345 case Intrinsic::loongarch_csrxchg_d: {
2346 unsigned Imm = Op.getConstantOperandVal(4);
2347 return !isUInt<14>(Imm)
2348 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2349 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2350 {Chain, Op.getOperand(2), Op.getOperand(3),
2351 DAG.getConstant(Imm, DL, GRLenVT)});
2352 }
2353 case Intrinsic::loongarch_iocsrrd_d: {
2354 return DAG.getNode(
2355 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
2356 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
2357 }
2358#define IOCSRRD_CASE(NAME, NODE) \
2359 case Intrinsic::loongarch_##NAME: { \
2360 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
2361 {Chain, Op.getOperand(2)}); \
2362 }
2363 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2364 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2365 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2366#undef IOCSRRD_CASE
2367 case Intrinsic::loongarch_cpucfg: {
2368 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2369 {Chain, Op.getOperand(2)});
2370 }
2371 case Intrinsic::loongarch_lddir_d: {
2372 unsigned Imm = Op.getConstantOperandVal(3);
2373 return !isUInt<8>(Imm)
2374 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2375 : Op;
2376 }
2377 case Intrinsic::loongarch_movfcsr2gr: {
2378 if (!Subtarget.hasBasicF())
2379 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
2380 unsigned Imm = Op.getConstantOperandVal(2);
2381 return !isUInt<2>(Imm)
2382 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2383 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
2384 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2385 }
2386 case Intrinsic::loongarch_lsx_vld:
2387 case Intrinsic::loongarch_lsx_vldrepl_b:
2388 case Intrinsic::loongarch_lasx_xvld:
2389 case Intrinsic::loongarch_lasx_xvldrepl_b:
2390 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2391 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2392 : SDValue();
2393 case Intrinsic::loongarch_lsx_vldrepl_h:
2394 case Intrinsic::loongarch_lasx_xvldrepl_h:
2395 return !isShiftedInt<11, 1>(
2396 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2398 Op, "argument out of range or not a multiple of 2", DAG)
2399 : SDValue();
2400 case Intrinsic::loongarch_lsx_vldrepl_w:
2401 case Intrinsic::loongarch_lasx_xvldrepl_w:
2402 return !isShiftedInt<10, 2>(
2403 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2405 Op, "argument out of range or not a multiple of 4", DAG)
2406 : SDValue();
2407 case Intrinsic::loongarch_lsx_vldrepl_d:
2408 case Intrinsic::loongarch_lasx_xvldrepl_d:
2409 return !isShiftedInt<9, 3>(
2410 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2412 Op, "argument out of range or not a multiple of 8", DAG)
2413 : SDValue();
2414 }
2415}
2416
2417// Helper function that emits error message for intrinsics with void return
2418// value and return the chain.
2420 SelectionDAG &DAG) {
2421
2422 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2423 return Op.getOperand(0);
2424}
2425
2426SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2427 SelectionDAG &DAG) const {
2428 SDLoc DL(Op);
2429 MVT GRLenVT = Subtarget.getGRLenVT();
2430 SDValue Chain = Op.getOperand(0);
2431 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
2432 SDValue Op2 = Op.getOperand(2);
2433 const StringRef ErrorMsgOOR = "argument out of range";
2434 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2435 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
2436 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2437
2438 switch (IntrinsicEnum) {
2439 default:
2440 // TODO: Add more Intrinsics.
2441 return SDValue();
2442 case Intrinsic::loongarch_cacop_d:
2443 case Intrinsic::loongarch_cacop_w: {
2444 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
2445 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
2446 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
2447 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
2448 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
2449 unsigned Imm1 = Op2->getAsZExtVal();
2450 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
2451 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
2452 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
2453 return Op;
2454 }
2455 case Intrinsic::loongarch_dbar: {
2456 unsigned Imm = Op2->getAsZExtVal();
2457 return !isUInt<15>(Imm)
2458 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2459 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
2460 DAG.getConstant(Imm, DL, GRLenVT));
2461 }
2462 case Intrinsic::loongarch_ibar: {
2463 unsigned Imm = Op2->getAsZExtVal();
2464 return !isUInt<15>(Imm)
2465 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2466 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
2467 DAG.getConstant(Imm, DL, GRLenVT));
2468 }
2469 case Intrinsic::loongarch_break: {
2470 unsigned Imm = Op2->getAsZExtVal();
2471 return !isUInt<15>(Imm)
2472 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2473 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
2474 DAG.getConstant(Imm, DL, GRLenVT));
2475 }
2476 case Intrinsic::loongarch_movgr2fcsr: {
2477 if (!Subtarget.hasBasicF())
2478 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
2479 unsigned Imm = Op2->getAsZExtVal();
2480 return !isUInt<2>(Imm)
2481 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2482 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
2483 DAG.getConstant(Imm, DL, GRLenVT),
2484 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
2485 Op.getOperand(3)));
2486 }
2487 case Intrinsic::loongarch_syscall: {
2488 unsigned Imm = Op2->getAsZExtVal();
2489 return !isUInt<15>(Imm)
2490 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2491 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
2492 DAG.getConstant(Imm, DL, GRLenVT));
2493 }
2494#define IOCSRWR_CASE(NAME, NODE) \
2495 case Intrinsic::loongarch_##NAME: { \
2496 SDValue Op3 = Op.getOperand(3); \
2497 return Subtarget.is64Bit() \
2498 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
2499 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2500 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
2501 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
2502 Op3); \
2503 }
2504 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
2505 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
2506 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
2507#undef IOCSRWR_CASE
2508 case Intrinsic::loongarch_iocsrwr_d: {
2509 return !Subtarget.is64Bit()
2510 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2511 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
2512 Op2,
2513 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
2514 Op.getOperand(3)));
2515 }
2516#define ASRT_LE_GT_CASE(NAME) \
2517 case Intrinsic::loongarch_##NAME: { \
2518 return !Subtarget.is64Bit() \
2519 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
2520 : Op; \
2521 }
2522 ASRT_LE_GT_CASE(asrtle_d)
2523 ASRT_LE_GT_CASE(asrtgt_d)
2524#undef ASRT_LE_GT_CASE
2525 case Intrinsic::loongarch_ldpte_d: {
2526 unsigned Imm = Op.getConstantOperandVal(3);
2527 return !Subtarget.is64Bit()
2528 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2529 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2530 : Op;
2531 }
2532 case Intrinsic::loongarch_lsx_vst:
2533 case Intrinsic::loongarch_lasx_xvst:
2534 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
2535 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2536 : SDValue();
2537 case Intrinsic::loongarch_lasx_xvstelm_b:
2538 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2539 !isUInt<5>(Op.getConstantOperandVal(5)))
2540 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2541 : SDValue();
2542 case Intrinsic::loongarch_lsx_vstelm_b:
2543 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2544 !isUInt<4>(Op.getConstantOperandVal(5)))
2545 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2546 : SDValue();
2547 case Intrinsic::loongarch_lasx_xvstelm_h:
2548 return (!isShiftedInt<8, 1>(
2549 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2550 !isUInt<4>(Op.getConstantOperandVal(5)))
2552 Op, "argument out of range or not a multiple of 2", DAG)
2553 : SDValue();
2554 case Intrinsic::loongarch_lsx_vstelm_h:
2555 return (!isShiftedInt<8, 1>(
2556 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2557 !isUInt<3>(Op.getConstantOperandVal(5)))
2559 Op, "argument out of range or not a multiple of 2", DAG)
2560 : SDValue();
2561 case Intrinsic::loongarch_lasx_xvstelm_w:
2562 return (!isShiftedInt<8, 2>(
2563 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2564 !isUInt<3>(Op.getConstantOperandVal(5)))
2566 Op, "argument out of range or not a multiple of 4", DAG)
2567 : SDValue();
2568 case Intrinsic::loongarch_lsx_vstelm_w:
2569 return (!isShiftedInt<8, 2>(
2570 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2571 !isUInt<2>(Op.getConstantOperandVal(5)))
2573 Op, "argument out of range or not a multiple of 4", DAG)
2574 : SDValue();
2575 case Intrinsic::loongarch_lasx_xvstelm_d:
2576 return (!isShiftedInt<8, 3>(
2577 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2578 !isUInt<2>(Op.getConstantOperandVal(5)))
2580 Op, "argument out of range or not a multiple of 8", DAG)
2581 : SDValue();
2582 case Intrinsic::loongarch_lsx_vstelm_d:
2583 return (!isShiftedInt<8, 3>(
2584 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2585 !isUInt<1>(Op.getConstantOperandVal(5)))
2587 Op, "argument out of range or not a multiple of 8", DAG)
2588 : SDValue();
2589 }
2590}
2591
2592SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
2593 SelectionDAG &DAG) const {
2594 SDLoc DL(Op);
2595 SDValue Lo = Op.getOperand(0);
2596 SDValue Hi = Op.getOperand(1);
2597 SDValue Shamt = Op.getOperand(2);
2598 EVT VT = Lo.getValueType();
2599
2600 // if Shamt-GRLen < 0: // Shamt < GRLen
2601 // Lo = Lo << Shamt
2602 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
2603 // else:
2604 // Lo = 0
2605 // Hi = Lo << (Shamt-GRLen)
2606
2607 SDValue Zero = DAG.getConstant(0, DL, VT);
2608 SDValue One = DAG.getConstant(1, DL, VT);
2609 SDValue MinusGRLen =
2610 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
2611 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2612 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2613 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2614
2615 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2616 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2617 SDValue ShiftRightLo =
2618 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
2619 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2620 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2621 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
2622
2623 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2624
2625 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2626 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2627
2628 SDValue Parts[2] = {Lo, Hi};
2629 return DAG.getMergeValues(Parts, DL);
2630}
2631
2632SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
2633 SelectionDAG &DAG,
2634 bool IsSRA) const {
2635 SDLoc DL(Op);
2636 SDValue Lo = Op.getOperand(0);
2637 SDValue Hi = Op.getOperand(1);
2638 SDValue Shamt = Op.getOperand(2);
2639 EVT VT = Lo.getValueType();
2640
2641 // SRA expansion:
2642 // if Shamt-GRLen < 0: // Shamt < GRLen
2643 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2644 // Hi = Hi >>s Shamt
2645 // else:
2646 // Lo = Hi >>s (Shamt-GRLen);
2647 // Hi = Hi >>s (GRLen-1)
2648 //
2649 // SRL expansion:
2650 // if Shamt-GRLen < 0: // Shamt < GRLen
2651 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2652 // Hi = Hi >>u Shamt
2653 // else:
2654 // Lo = Hi >>u (Shamt-GRLen);
2655 // Hi = 0;
2656
2657 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2658
2659 SDValue Zero = DAG.getConstant(0, DL, VT);
2660 SDValue One = DAG.getConstant(1, DL, VT);
2661 SDValue MinusGRLen =
2662 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
2663 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2664 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2665 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2666
2667 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2668 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2669 SDValue ShiftLeftHi =
2670 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
2671 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2672 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2673 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
2674 SDValue HiFalse =
2675 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
2676
2677 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2678
2679 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2680 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2681
2682 SDValue Parts[2] = {Lo, Hi};
2683 return DAG.getMergeValues(Parts, DL);
2684}
2685
2686// Returns the opcode of the target-specific SDNode that implements the 32-bit
2687// form of the given Opcode.
2689 switch (Opcode) {
2690 default:
2691 llvm_unreachable("Unexpected opcode");
2692 case ISD::SDIV:
2693 return LoongArchISD::DIV_W;
2694 case ISD::UDIV:
2695 return LoongArchISD::DIV_WU;
2696 case ISD::SREM:
2697 return LoongArchISD::MOD_W;
2698 case ISD::UREM:
2699 return LoongArchISD::MOD_WU;
2700 case ISD::SHL:
2701 return LoongArchISD::SLL_W;
2702 case ISD::SRA:
2703 return LoongArchISD::SRA_W;
2704 case ISD::SRL:
2705 return LoongArchISD::SRL_W;
2706 case ISD::ROTL:
2707 case ISD::ROTR:
2708 return LoongArchISD::ROTR_W;
2709 case ISD::CTTZ:
2710 return LoongArchISD::CTZ_W;
2711 case ISD::CTLZ:
2712 return LoongArchISD::CLZ_W;
2713 }
2714}
2715
2716// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
2717// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
2718// otherwise be promoted to i64, making it difficult to select the
2719// SLL_W/.../*W later one because the fact the operation was originally of
2720// type i8/i16/i32 is lost.
2722 unsigned ExtOpc = ISD::ANY_EXTEND) {
2723 SDLoc DL(N);
2724 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
2725 SDValue NewOp0, NewRes;
2726
2727 switch (NumOp) {
2728 default:
2729 llvm_unreachable("Unexpected NumOp");
2730 case 1: {
2731 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2732 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
2733 break;
2734 }
2735 case 2: {
2736 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2737 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
2738 if (N->getOpcode() == ISD::ROTL) {
2739 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
2740 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
2741 }
2742 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
2743 break;
2744 }
2745 // TODO:Handle more NumOp.
2746 }
2747
2748 // ReplaceNodeResults requires we maintain the same type for the return
2749 // value.
2750 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
2751}
2752
2753// Converts the given 32-bit operation to a i64 operation with signed extension
2754// semantic to reduce the signed extension instructions.
2756 SDLoc DL(N);
2757 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2758 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2759 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
2760 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
2761 DAG.getValueType(MVT::i32));
2762 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
2763}
2764
2765// Helper function that emits error message for intrinsics with/without chain
2766// and return a UNDEF or and the chain as the results.
2769 StringRef ErrorMsg, bool WithChain = true) {
2770 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
2771 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
2772 if (!WithChain)
2773 return;
2774 Results.push_back(N->getOperand(0));
2775}
2776
2777template <unsigned N>
2778static void
2780 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
2781 unsigned ResOp) {
2782 const StringRef ErrorMsgOOR = "argument out of range";
2783 unsigned Imm = Node->getConstantOperandVal(2);
2784 if (!isUInt<N>(Imm)) {
2786 /*WithChain=*/false);
2787 return;
2788 }
2789 SDLoc DL(Node);
2790 SDValue Vec = Node->getOperand(1);
2791
2792 SDValue PickElt =
2793 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
2794 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
2796 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
2797 PickElt.getValue(0)));
2798}
2799
2802 SelectionDAG &DAG,
2803 const LoongArchSubtarget &Subtarget,
2804 unsigned ResOp) {
2805 SDLoc DL(N);
2806 SDValue Vec = N->getOperand(1);
2807
2808 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
2809 Results.push_back(
2810 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
2811}
2812
2813static void
2815 SelectionDAG &DAG,
2816 const LoongArchSubtarget &Subtarget) {
2817 switch (N->getConstantOperandVal(0)) {
2818 default:
2819 llvm_unreachable("Unexpected Intrinsic.");
2820 case Intrinsic::loongarch_lsx_vpickve2gr_b:
2821 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2823 break;
2824 case Intrinsic::loongarch_lsx_vpickve2gr_h:
2825 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
2826 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2828 break;
2829 case Intrinsic::loongarch_lsx_vpickve2gr_w:
2830 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2832 break;
2833 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
2834 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2836 break;
2837 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
2838 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
2839 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2841 break;
2842 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
2843 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2845 break;
2846 case Intrinsic::loongarch_lsx_bz_b:
2847 case Intrinsic::loongarch_lsx_bz_h:
2848 case Intrinsic::loongarch_lsx_bz_w:
2849 case Intrinsic::loongarch_lsx_bz_d:
2850 case Intrinsic::loongarch_lasx_xbz_b:
2851 case Intrinsic::loongarch_lasx_xbz_h:
2852 case Intrinsic::loongarch_lasx_xbz_w:
2853 case Intrinsic::loongarch_lasx_xbz_d:
2854 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2856 break;
2857 case Intrinsic::loongarch_lsx_bz_v:
2858 case Intrinsic::loongarch_lasx_xbz_v:
2859 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2861 break;
2862 case Intrinsic::loongarch_lsx_bnz_b:
2863 case Intrinsic::loongarch_lsx_bnz_h:
2864 case Intrinsic::loongarch_lsx_bnz_w:
2865 case Intrinsic::loongarch_lsx_bnz_d:
2866 case Intrinsic::loongarch_lasx_xbnz_b:
2867 case Intrinsic::loongarch_lasx_xbnz_h:
2868 case Intrinsic::loongarch_lasx_xbnz_w:
2869 case Intrinsic::loongarch_lasx_xbnz_d:
2870 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2872 break;
2873 case Intrinsic::loongarch_lsx_bnz_v:
2874 case Intrinsic::loongarch_lasx_xbnz_v:
2875 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2877 break;
2878 }
2879}
2880
2883 SDLoc DL(N);
2884 EVT VT = N->getValueType(0);
2885 switch (N->getOpcode()) {
2886 default:
2887 llvm_unreachable("Don't know how to legalize this operation");
2888 case ISD::ADD:
2889 case ISD::SUB:
2890 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2891 "Unexpected custom legalisation");
2892 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
2893 break;
2894 case ISD::SDIV:
2895 case ISD::UDIV:
2896 case ISD::SREM:
2897 case ISD::UREM:
2898 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2899 "Unexpected custom legalisation");
2900 Results.push_back(customLegalizeToWOp(N, DAG, 2,
2901 Subtarget.hasDiv32() && VT == MVT::i32
2903 : ISD::SIGN_EXTEND));
2904 break;
2905 case ISD::SHL:
2906 case ISD::SRA:
2907 case ISD::SRL:
2908 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2909 "Unexpected custom legalisation");
2910 if (N->getOperand(1).getOpcode() != ISD::Constant) {
2911 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2912 break;
2913 }
2914 break;
2915 case ISD::ROTL:
2916 case ISD::ROTR:
2917 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2918 "Unexpected custom legalisation");
2919 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2920 break;
2921 case ISD::FP_TO_SINT: {
2922 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2923 "Unexpected custom legalisation");
2924 SDValue Src = N->getOperand(0);
2925 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
2926 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
2928 if (Src.getValueType() == MVT::f16)
2929 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
2930 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
2931 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
2932 return;
2933 }
2934 // If the FP type needs to be softened, emit a library call using the 'si'
2935 // version. If we left it to default legalization we'd end up with 'di'.
2936 RTLIB::Libcall LC;
2937 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
2938 MakeLibCallOptions CallOptions;
2939 EVT OpVT = Src.getValueType();
2940 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
2941 SDValue Chain = SDValue();
2942 SDValue Result;
2943 std::tie(Result, Chain) =
2944 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
2945 Results.push_back(Result);
2946 break;
2947 }
2948 case ISD::BITCAST: {
2949 SDValue Src = N->getOperand(0);
2950 EVT SrcVT = Src.getValueType();
2951 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
2952 Subtarget.hasBasicF()) {
2953 SDValue Dst =
2954 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
2955 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
2956 }
2957 break;
2958 }
2959 case ISD::FP_TO_UINT: {
2960 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2961 "Unexpected custom legalisation");
2962 auto &TLI = DAG.getTargetLoweringInfo();
2963 SDValue Tmp1, Tmp2;
2964 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
2965 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
2966 break;
2967 }
2968 case ISD::BSWAP: {
2969 SDValue Src = N->getOperand(0);
2970 assert((VT == MVT::i16 || VT == MVT::i32) &&
2971 "Unexpected custom legalization");
2972 MVT GRLenVT = Subtarget.getGRLenVT();
2973 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2974 SDValue Tmp;
2975 switch (VT.getSizeInBits()) {
2976 default:
2977 llvm_unreachable("Unexpected operand width");
2978 case 16:
2979 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
2980 break;
2981 case 32:
2982 // Only LA64 will get to here due to the size mismatch between VT and
2983 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
2984 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
2985 break;
2986 }
2987 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
2988 break;
2989 }
2990 case ISD::BITREVERSE: {
2991 SDValue Src = N->getOperand(0);
2992 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
2993 "Unexpected custom legalization");
2994 MVT GRLenVT = Subtarget.getGRLenVT();
2995 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2996 SDValue Tmp;
2997 switch (VT.getSizeInBits()) {
2998 default:
2999 llvm_unreachable("Unexpected operand width");
3000 case 8:
3001 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
3002 break;
3003 case 32:
3004 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
3005 break;
3006 }
3007 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
3008 break;
3009 }
3010 case ISD::CTLZ:
3011 case ISD::CTTZ: {
3012 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
3013 "Unexpected custom legalisation");
3014 Results.push_back(customLegalizeToWOp(N, DAG, 1));
3015 break;
3016 }
3018 SDValue Chain = N->getOperand(0);
3019 SDValue Op2 = N->getOperand(2);
3020 MVT GRLenVT = Subtarget.getGRLenVT();
3021 const StringRef ErrorMsgOOR = "argument out of range";
3022 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3023 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3024
3025 switch (N->getConstantOperandVal(1)) {
3026 default:
3027 llvm_unreachable("Unexpected Intrinsic.");
3028 case Intrinsic::loongarch_movfcsr2gr: {
3029 if (!Subtarget.hasBasicF()) {
3030 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
3031 return;
3032 }
3033 unsigned Imm = Op2->getAsZExtVal();
3034 if (!isUInt<2>(Imm)) {
3035 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3036 return;
3037 }
3038 SDValue MOVFCSR2GRResults = DAG.getNode(
3039 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
3040 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3041 Results.push_back(
3042 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
3043 Results.push_back(MOVFCSR2GRResults.getValue(1));
3044 break;
3045 }
3046#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
3047 case Intrinsic::loongarch_##NAME: { \
3048 SDValue NODE = DAG.getNode( \
3049 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3050 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
3051 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
3052 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
3053 Results.push_back(NODE.getValue(1)); \
3054 break; \
3055 }
3056 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
3057 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
3058 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
3059 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
3060 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
3061 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
3062#undef CRC_CASE_EXT_BINARYOP
3063
3064#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
3065 case Intrinsic::loongarch_##NAME: { \
3066 SDValue NODE = DAG.getNode( \
3067 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3068 {Chain, Op2, \
3069 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
3070 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
3071 Results.push_back(NODE.getValue(1)); \
3072 break; \
3073 }
3074 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
3075 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
3076#undef CRC_CASE_EXT_UNARYOP
3077#define CSR_CASE(ID) \
3078 case Intrinsic::loongarch_##ID: { \
3079 if (!Subtarget.is64Bit()) \
3080 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
3081 break; \
3082 }
3083 CSR_CASE(csrrd_d);
3084 CSR_CASE(csrwr_d);
3085 CSR_CASE(csrxchg_d);
3086 CSR_CASE(iocsrrd_d);
3087#undef CSR_CASE
3088 case Intrinsic::loongarch_csrrd_w: {
3089 unsigned Imm = Op2->getAsZExtVal();
3090 if (!isUInt<14>(Imm)) {
3091 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3092 return;
3093 }
3094 SDValue CSRRDResults =
3095 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3096 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3097 Results.push_back(
3098 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
3099 Results.push_back(CSRRDResults.getValue(1));
3100 break;
3101 }
3102 case Intrinsic::loongarch_csrwr_w: {
3103 unsigned Imm = N->getConstantOperandVal(3);
3104 if (!isUInt<14>(Imm)) {
3105 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3106 return;
3107 }
3108 SDValue CSRWRResults =
3109 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3110 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3111 DAG.getConstant(Imm, DL, GRLenVT)});
3112 Results.push_back(
3113 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
3114 Results.push_back(CSRWRResults.getValue(1));
3115 break;
3116 }
3117 case Intrinsic::loongarch_csrxchg_w: {
3118 unsigned Imm = N->getConstantOperandVal(4);
3119 if (!isUInt<14>(Imm)) {
3120 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3121 return;
3122 }
3123 SDValue CSRXCHGResults = DAG.getNode(
3124 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3125 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3126 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
3127 DAG.getConstant(Imm, DL, GRLenVT)});
3128 Results.push_back(
3129 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
3130 Results.push_back(CSRXCHGResults.getValue(1));
3131 break;
3132 }
3133#define IOCSRRD_CASE(NAME, NODE) \
3134 case Intrinsic::loongarch_##NAME: { \
3135 SDValue IOCSRRDResults = \
3136 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3137 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
3138 Results.push_back( \
3139 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
3140 Results.push_back(IOCSRRDResults.getValue(1)); \
3141 break; \
3142 }
3143 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3144 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3145 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3146#undef IOCSRRD_CASE
3147 case Intrinsic::loongarch_cpucfg: {
3148 SDValue CPUCFGResults =
3149 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3150 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
3151 Results.push_back(
3152 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
3153 Results.push_back(CPUCFGResults.getValue(1));
3154 break;
3155 }
3156 case Intrinsic::loongarch_lddir_d: {
3157 if (!Subtarget.is64Bit()) {
3158 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
3159 return;
3160 }
3161 break;
3162 }
3163 }
3164 break;
3165 }
3166 case ISD::READ_REGISTER: {
3167 if (Subtarget.is64Bit())
3168 DAG.getContext()->emitError(
3169 "On LA64, only 64-bit registers can be read.");
3170 else
3171 DAG.getContext()->emitError(
3172 "On LA32, only 32-bit registers can be read.");
3173 Results.push_back(DAG.getUNDEF(VT));
3174 Results.push_back(N->getOperand(0));
3175 break;
3176 }
3178 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
3179 break;
3180 }
3181 case ISD::LROUND: {
3182 SDValue Op0 = N->getOperand(0);
3183 EVT OpVT = Op0.getValueType();
3184 RTLIB::Libcall LC =
3185 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
3186 MakeLibCallOptions CallOptions;
3187 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
3188 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
3189 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
3190 Results.push_back(Result);
3191 break;
3192 }
3193 }
3194}
3195
3198 const LoongArchSubtarget &Subtarget) {
3199 if (DCI.isBeforeLegalizeOps())
3200 return SDValue();
3201
3202 SDValue FirstOperand = N->getOperand(0);
3203 SDValue SecondOperand = N->getOperand(1);
3204 unsigned FirstOperandOpc = FirstOperand.getOpcode();
3205 EVT ValTy = N->getValueType(0);
3206 SDLoc DL(N);
3207 uint64_t lsb, msb;
3208 unsigned SMIdx, SMLen;
3209 ConstantSDNode *CN;
3210 SDValue NewOperand;
3211 MVT GRLenVT = Subtarget.getGRLenVT();
3212
3213 // Op's second operand must be a shifted mask.
3214 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
3215 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
3216 return SDValue();
3217
3218 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
3219 // Pattern match BSTRPICK.
3220 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
3221 // => BSTRPICK $dst, $src, msb, lsb
3222 // where msb = lsb + len - 1
3223
3224 // The second operand of the shift must be an immediate.
3225 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
3226 return SDValue();
3227
3228 lsb = CN->getZExtValue();
3229
3230 // Return if the shifted mask does not start at bit 0 or the sum of its
3231 // length and lsb exceeds the word's size.
3232 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
3233 return SDValue();
3234
3235 NewOperand = FirstOperand.getOperand(0);
3236 } else {
3237 // Pattern match BSTRPICK.
3238 // $dst = and $src, (2**len- 1) , if len > 12
3239 // => BSTRPICK $dst, $src, msb, lsb
3240 // where lsb = 0 and msb = len - 1
3241
3242 // If the mask is <= 0xfff, andi can be used instead.
3243 if (CN->getZExtValue() <= 0xfff)
3244 return SDValue();
3245
3246 // Return if the MSB exceeds.
3247 if (SMIdx + SMLen > ValTy.getSizeInBits())
3248 return SDValue();
3249
3250 if (SMIdx > 0) {
3251 // Omit if the constant has more than 2 uses. This a conservative
3252 // decision. Whether it is a win depends on the HW microarchitecture.
3253 // However it should always be better for 1 and 2 uses.
3254 if (CN->use_size() > 2)
3255 return SDValue();
3256 // Return if the constant can be composed by a single LU12I.W.
3257 if ((CN->getZExtValue() & 0xfff) == 0)
3258 return SDValue();
3259 // Return if the constand can be composed by a single ADDI with
3260 // the zero register.
3261 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
3262 return SDValue();
3263 }
3264
3265 lsb = SMIdx;
3266 NewOperand = FirstOperand;
3267 }
3268
3269 msb = lsb + SMLen - 1;
3270 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
3271 DAG.getConstant(msb, DL, GRLenVT),
3272 DAG.getConstant(lsb, DL, GRLenVT));
3273 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
3274 return NR0;
3275 // Try to optimize to
3276 // bstrpick $Rd, $Rs, msb, lsb
3277 // slli $Rd, $Rd, lsb
3278 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
3279 DAG.getConstant(lsb, DL, GRLenVT));
3280}
3281
3284 const LoongArchSubtarget &Subtarget) {
3285 if (DCI.isBeforeLegalizeOps())
3286 return SDValue();
3287
3288 // $dst = srl (and $src, Mask), Shamt
3289 // =>
3290 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
3291 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
3292 //
3293
3294 SDValue FirstOperand = N->getOperand(0);
3295 ConstantSDNode *CN;
3296 EVT ValTy = N->getValueType(0);
3297 SDLoc DL(N);
3298 MVT GRLenVT = Subtarget.getGRLenVT();
3299 unsigned MaskIdx, MaskLen;
3300 uint64_t Shamt;
3301
3302 // The first operand must be an AND and the second operand of the AND must be
3303 // a shifted mask.
3304 if (FirstOperand.getOpcode() != ISD::AND ||
3305 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
3306 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
3307 return SDValue();
3308
3309 // The second operand (shift amount) must be an immediate.
3310 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
3311 return SDValue();
3312
3313 Shamt = CN->getZExtValue();
3314 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
3315 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
3316 FirstOperand->getOperand(0),
3317 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3318 DAG.getConstant(Shamt, DL, GRLenVT));
3319
3320 return SDValue();
3321}
3322
3325 const LoongArchSubtarget &Subtarget) {
3326 MVT GRLenVT = Subtarget.getGRLenVT();
3327 EVT ValTy = N->getValueType(0);
3328 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3329 ConstantSDNode *CN0, *CN1;
3330 SDLoc DL(N);
3331 unsigned ValBits = ValTy.getSizeInBits();
3332 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
3333 unsigned Shamt;
3334 bool SwapAndRetried = false;
3335
3336 if (DCI.isBeforeLegalizeOps())
3337 return SDValue();
3338
3339 if (ValBits != 32 && ValBits != 64)
3340 return SDValue();
3341
3342Retry:
3343 // 1st pattern to match BSTRINS:
3344 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
3345 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
3346 // =>
3347 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3348 if (N0.getOpcode() == ISD::AND &&
3349 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3350 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3351 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
3352 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3353 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3354 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
3355 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3356 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3357 (MaskIdx0 + MaskLen0 <= ValBits)) {
3358 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
3359 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3360 N1.getOperand(0).getOperand(0),
3361 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3362 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3363 }
3364
3365 // 2nd pattern to match BSTRINS:
3366 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
3367 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
3368 // =>
3369 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3370 if (N0.getOpcode() == ISD::AND &&
3371 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3372 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3373 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3374 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3375 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3376 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3377 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3378 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
3379 (MaskIdx0 + MaskLen0 <= ValBits)) {
3380 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
3381 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3382 N1.getOperand(0).getOperand(0),
3383 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3384 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3385 }
3386
3387 // 3rd pattern to match BSTRINS:
3388 // R = or (and X, mask0), (and Y, mask1)
3389 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
3390 // =>
3391 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
3392 // where msb = lsb + size - 1
3393 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3394 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3395 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3396 (MaskIdx0 + MaskLen0 <= 64) &&
3397 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
3398 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3399 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
3400 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3401 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
3402 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
3403 DAG.getConstant(ValBits == 32
3404 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3405 : (MaskIdx0 + MaskLen0 - 1),
3406 DL, GRLenVT),
3407 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3408 }
3409
3410 // 4th pattern to match BSTRINS:
3411 // R = or (and X, mask), (shl Y, shamt)
3412 // where mask = (2**shamt - 1)
3413 // =>
3414 // R = BSTRINS X, Y, ValBits - 1, shamt
3415 // where ValBits = 32 or 64
3416 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
3417 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3418 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
3419 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3420 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
3421 (MaskIdx0 + MaskLen0 <= ValBits)) {
3422 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
3423 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3424 N1.getOperand(0),
3425 DAG.getConstant((ValBits - 1), DL, GRLenVT),
3426 DAG.getConstant(Shamt, DL, GRLenVT));
3427 }
3428
3429 // 5th pattern to match BSTRINS:
3430 // R = or (and X, mask), const
3431 // where ~mask = (2**size - 1) << lsb, mask & const = 0
3432 // =>
3433 // R = BSTRINS X, (const >> lsb), msb, lsb
3434 // where msb = lsb + size - 1
3435 if (N0.getOpcode() == ISD::AND &&
3436 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3437 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3438 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
3439 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3440 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
3441 return DAG.getNode(
3442 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3443 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
3444 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3445 : (MaskIdx0 + MaskLen0 - 1),
3446 DL, GRLenVT),
3447 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3448 }
3449
3450 // 6th pattern.
3451 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
3452 // by the incoming bits are known to be zero.
3453 // =>
3454 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
3455 //
3456 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
3457 // pattern is more common than the 1st. So we put the 1st before the 6th in
3458 // order to match as many nodes as possible.
3459 ConstantSDNode *CNMask, *CNShamt;
3460 unsigned MaskIdx, MaskLen;
3461 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3462 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3463 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3464 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3465 CNShamt->getZExtValue() + MaskLen <= ValBits) {
3466 Shamt = CNShamt->getZExtValue();
3467 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
3468 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3469 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
3470 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3471 N1.getOperand(0).getOperand(0),
3472 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
3473 DAG.getConstant(Shamt, DL, GRLenVT));
3474 }
3475 }
3476
3477 // 7th pattern.
3478 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
3479 // overwritten by the incoming bits are known to be zero.
3480 // =>
3481 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
3482 //
3483 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
3484 // before the 7th in order to match as many nodes as possible.
3485 if (N1.getOpcode() == ISD::AND &&
3486 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3487 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3488 N1.getOperand(0).getOpcode() == ISD::SHL &&
3489 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3490 CNShamt->getZExtValue() == MaskIdx) {
3491 APInt ShMask(ValBits, CNMask->getZExtValue());
3492 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3493 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
3494 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3495 N1.getOperand(0).getOperand(0),
3496 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3497 DAG.getConstant(MaskIdx, DL, GRLenVT));
3498 }
3499 }
3500
3501 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
3502 if (!SwapAndRetried) {
3503 std::swap(N0, N1);
3504 SwapAndRetried = true;
3505 goto Retry;
3506 }
3507
3508 SwapAndRetried = false;
3509Retry2:
3510 // 8th pattern.
3511 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
3512 // the incoming bits are known to be zero.
3513 // =>
3514 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
3515 //
3516 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
3517 // we put it here in order to match as many nodes as possible or generate less
3518 // instructions.
3519 if (N1.getOpcode() == ISD::AND &&
3520 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3521 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
3522 APInt ShMask(ValBits, CNMask->getZExtValue());
3523 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3524 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
3525 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3526 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
3527 N1->getOperand(0),
3528 DAG.getConstant(MaskIdx, DL, GRLenVT)),
3529 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3530 DAG.getConstant(MaskIdx, DL, GRLenVT));
3531 }
3532 }
3533 // Swap N0/N1 and retry.
3534 if (!SwapAndRetried) {
3535 std::swap(N0, N1);
3536 SwapAndRetried = true;
3537 goto Retry2;
3538 }
3539
3540 return SDValue();
3541}
3542
3543static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
3544 ExtType = ISD::NON_EXTLOAD;
3545
3546 switch (V.getNode()->getOpcode()) {
3547 case ISD::LOAD: {
3548 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
3549 if ((LoadNode->getMemoryVT() == MVT::i8) ||
3550 (LoadNode->getMemoryVT() == MVT::i16)) {
3551 ExtType = LoadNode->getExtensionType();
3552 return true;
3553 }
3554 return false;
3555 }
3556 case ISD::AssertSext: {
3557 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3558 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3559 ExtType = ISD::SEXTLOAD;
3560 return true;
3561 }
3562 return false;
3563 }
3564 case ISD::AssertZext: {
3565 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3566 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3567 ExtType = ISD::ZEXTLOAD;
3568 return true;
3569 }
3570 return false;
3571 }
3572 default:
3573 return false;
3574 }
3575
3576 return false;
3577}
3578
3579// Eliminate redundant truncation and zero-extension nodes.
3580// * Case 1:
3581// +------------+ +------------+ +------------+
3582// | Input1 | | Input2 | | CC |
3583// +------------+ +------------+ +------------+
3584// | | |
3585// V V +----+
3586// +------------+ +------------+ |
3587// | TRUNCATE | | TRUNCATE | |
3588// +------------+ +------------+ |
3589// | | |
3590// V V |
3591// +------------+ +------------+ |
3592// | ZERO_EXT | | ZERO_EXT | |
3593// +------------+ +------------+ |
3594// | | |
3595// | +-------------+ |
3596// V V | |
3597// +----------------+ | |
3598// | AND | | |
3599// +----------------+ | |
3600// | | |
3601// +---------------+ | |
3602// | | |
3603// V V V
3604// +-------------+
3605// | CMP |
3606// +-------------+
3607// * Case 2:
3608// +------------+ +------------+ +-------------+ +------------+ +------------+
3609// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
3610// +------------+ +------------+ +-------------+ +------------+ +------------+
3611// | | | | |
3612// V | | | |
3613// +------------+ | | | |
3614// | XOR |<---------------------+ | |
3615// +------------+ | | |
3616// | | | |
3617// V V +---------------+ |
3618// +------------+ +------------+ | |
3619// | TRUNCATE | | TRUNCATE | | +-------------------------+
3620// +------------+ +------------+ | |
3621// | | | |
3622// V V | |
3623// +------------+ +------------+ | |
3624// | ZERO_EXT | | ZERO_EXT | | |
3625// +------------+ +------------+ | |
3626// | | | |
3627// V V | |
3628// +----------------+ | |
3629// | AND | | |
3630// +----------------+ | |
3631// | | |
3632// +---------------+ | |
3633// | | |
3634// V V V
3635// +-------------+
3636// | CMP |
3637// +-------------+
3640 const LoongArchSubtarget &Subtarget) {
3641 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3642
3643 SDNode *AndNode = N->getOperand(0).getNode();
3644 if (AndNode->getOpcode() != ISD::AND)
3645 return SDValue();
3646
3647 SDValue AndInputValue2 = AndNode->getOperand(1);
3648 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
3649 return SDValue();
3650
3651 SDValue CmpInputValue = N->getOperand(1);
3652 SDValue AndInputValue1 = AndNode->getOperand(0);
3653 if (AndInputValue1.getOpcode() == ISD::XOR) {
3654 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3655 return SDValue();
3656 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
3657 if (!CN || CN->getSExtValue() != -1)
3658 return SDValue();
3659 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
3660 if (!CN || CN->getSExtValue() != 0)
3661 return SDValue();
3662 AndInputValue1 = AndInputValue1.getOperand(0);
3663 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
3664 return SDValue();
3665 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
3666 if (AndInputValue2 != CmpInputValue)
3667 return SDValue();
3668 } else {
3669 return SDValue();
3670 }
3671
3672 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
3673 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
3674 return SDValue();
3675
3676 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
3677 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
3678 return SDValue();
3679
3680 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
3681 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
3682 ISD::LoadExtType ExtType1;
3683 ISD::LoadExtType ExtType2;
3684
3685 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
3686 !checkValueWidth(TruncInputValue2, ExtType2))
3687 return SDValue();
3688
3689 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
3690 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
3691 return SDValue();
3692
3693 if ((ExtType2 != ISD::ZEXTLOAD) &&
3694 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
3695 return SDValue();
3696
3697 // These truncation and zero-extension nodes are not necessary, remove them.
3698 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
3699 TruncInputValue1, TruncInputValue2);
3700 SDValue NewSetCC =
3701 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
3702 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
3703 return SDValue(N, 0);
3704}
3705
3706// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
3709 const LoongArchSubtarget &Subtarget) {
3710 if (DCI.isBeforeLegalizeOps())
3711 return SDValue();
3712
3713 SDValue Src = N->getOperand(0);
3714 if (Src.getOpcode() != LoongArchISD::REVB_2W)
3715 return SDValue();
3716
3717 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
3718 Src.getOperand(0));
3719}
3720
3721template <unsigned N>
3723 SelectionDAG &DAG,
3724 const LoongArchSubtarget &Subtarget,
3725 bool IsSigned = false) {
3726 SDLoc DL(Node);
3727 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3728 // Check the ImmArg.
3729 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3730 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3731 DAG.getContext()->emitError(Node->getOperationName(0) +
3732 ": argument out of range.");
3733 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
3734 }
3735 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
3736}
3737
3738template <unsigned N>
3739static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
3740 SelectionDAG &DAG, bool IsSigned = false) {
3741 SDLoc DL(Node);
3742 EVT ResTy = Node->getValueType(0);
3743 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3744
3745 // Check the ImmArg.
3746 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3747 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3748 DAG.getContext()->emitError(Node->getOperationName(0) +
3749 ": argument out of range.");
3750 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3751 }
3752 return DAG.getConstant(
3754 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
3755 DL, ResTy);
3756}
3757
3759 SDLoc DL(Node);
3760 EVT ResTy = Node->getValueType(0);
3761 SDValue Vec = Node->getOperand(2);
3762 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
3763 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
3764}
3765
3767 SDLoc DL(Node);
3768 EVT ResTy = Node->getValueType(0);
3769 SDValue One = DAG.getConstant(1, DL, ResTy);
3770 SDValue Bit =
3771 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
3772
3773 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
3774 DAG.getNOT(DL, Bit, ResTy));
3775}
3776
3777template <unsigned N>
3779 SDLoc DL(Node);
3780 EVT ResTy = Node->getValueType(0);
3781 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3782 // Check the unsigned ImmArg.
3783 if (!isUInt<N>(CImm->getZExtValue())) {
3784 DAG.getContext()->emitError(Node->getOperationName(0) +
3785 ": argument out of range.");
3786 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3787 }
3788
3789 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3790 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
3791
3792 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
3793}
3794
3795template <unsigned N>
3797 SDLoc DL(Node);
3798 EVT ResTy = Node->getValueType(0);
3799 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3800 // Check the unsigned ImmArg.
3801 if (!isUInt<N>(CImm->getZExtValue())) {
3802 DAG.getContext()->emitError(Node->getOperationName(0) +
3803 ": argument out of range.");
3804 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3805 }
3806
3807 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3808 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3809 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
3810}
3811
3812template <unsigned N>
3814 SDLoc DL(Node);
3815 EVT ResTy = Node->getValueType(0);
3816 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3817 // Check the unsigned ImmArg.
3818 if (!isUInt<N>(CImm->getZExtValue())) {
3819 DAG.getContext()->emitError(Node->getOperationName(0) +
3820 ": argument out of range.");
3821 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3822 }
3823
3824 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3825 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3826 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
3827}
3828
3829static SDValue
3832 const LoongArchSubtarget &Subtarget) {
3833 SDLoc DL(N);
3834 switch (N->getConstantOperandVal(0)) {
3835 default:
3836 break;
3837 case Intrinsic::loongarch_lsx_vadd_b:
3838 case Intrinsic::loongarch_lsx_vadd_h:
3839 case Intrinsic::loongarch_lsx_vadd_w:
3840 case Intrinsic::loongarch_lsx_vadd_d:
3841 case Intrinsic::loongarch_lasx_xvadd_b:
3842 case Intrinsic::loongarch_lasx_xvadd_h:
3843 case Intrinsic::loongarch_lasx_xvadd_w:
3844 case Intrinsic::loongarch_lasx_xvadd_d:
3845 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3846 N->getOperand(2));
3847 case Intrinsic::loongarch_lsx_vaddi_bu:
3848 case Intrinsic::loongarch_lsx_vaddi_hu:
3849 case Intrinsic::loongarch_lsx_vaddi_wu:
3850 case Intrinsic::loongarch_lsx_vaddi_du:
3851 case Intrinsic::loongarch_lasx_xvaddi_bu:
3852 case Intrinsic::loongarch_lasx_xvaddi_hu:
3853 case Intrinsic::loongarch_lasx_xvaddi_wu:
3854 case Intrinsic::loongarch_lasx_xvaddi_du:
3855 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3856 lowerVectorSplatImm<5>(N, 2, DAG));
3857 case Intrinsic::loongarch_lsx_vsub_b:
3858 case Intrinsic::loongarch_lsx_vsub_h:
3859 case Intrinsic::loongarch_lsx_vsub_w:
3860 case Intrinsic::loongarch_lsx_vsub_d:
3861 case Intrinsic::loongarch_lasx_xvsub_b:
3862 case Intrinsic::loongarch_lasx_xvsub_h:
3863 case Intrinsic::loongarch_lasx_xvsub_w:
3864 case Intrinsic::loongarch_lasx_xvsub_d:
3865 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3866 N->getOperand(2));
3867 case Intrinsic::loongarch_lsx_vsubi_bu:
3868 case Intrinsic::loongarch_lsx_vsubi_hu:
3869 case Intrinsic::loongarch_lsx_vsubi_wu:
3870 case Intrinsic::loongarch_lsx_vsubi_du:
3871 case Intrinsic::loongarch_lasx_xvsubi_bu:
3872 case Intrinsic::loongarch_lasx_xvsubi_hu:
3873 case Intrinsic::loongarch_lasx_xvsubi_wu:
3874 case Intrinsic::loongarch_lasx_xvsubi_du:
3875 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3876 lowerVectorSplatImm<5>(N, 2, DAG));
3877 case Intrinsic::loongarch_lsx_vneg_b:
3878 case Intrinsic::loongarch_lsx_vneg_h:
3879 case Intrinsic::loongarch_lsx_vneg_w:
3880 case Intrinsic::loongarch_lsx_vneg_d:
3881 case Intrinsic::loongarch_lasx_xvneg_b:
3882 case Intrinsic::loongarch_lasx_xvneg_h:
3883 case Intrinsic::loongarch_lasx_xvneg_w:
3884 case Intrinsic::loongarch_lasx_xvneg_d:
3885 return DAG.getNode(
3886 ISD::SUB, DL, N->getValueType(0),
3887 DAG.getConstant(
3888 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
3889 /*isSigned=*/true),
3890 SDLoc(N), N->getValueType(0)),
3891 N->getOperand(1));
3892 case Intrinsic::loongarch_lsx_vmax_b:
3893 case Intrinsic::loongarch_lsx_vmax_h:
3894 case Intrinsic::loongarch_lsx_vmax_w:
3895 case Intrinsic::loongarch_lsx_vmax_d:
3896 case Intrinsic::loongarch_lasx_xvmax_b:
3897 case Intrinsic::loongarch_lasx_xvmax_h:
3898 case Intrinsic::loongarch_lasx_xvmax_w:
3899 case Intrinsic::loongarch_lasx_xvmax_d:
3900 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3901 N->getOperand(2));
3902 case Intrinsic::loongarch_lsx_vmax_bu:
3903 case Intrinsic::loongarch_lsx_vmax_hu:
3904 case Intrinsic::loongarch_lsx_vmax_wu:
3905 case Intrinsic::loongarch_lsx_vmax_du:
3906 case Intrinsic::loongarch_lasx_xvmax_bu:
3907 case Intrinsic::loongarch_lasx_xvmax_hu:
3908 case Intrinsic::loongarch_lasx_xvmax_wu:
3909 case Intrinsic::loongarch_lasx_xvmax_du:
3910 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3911 N->getOperand(2));
3912 case Intrinsic::loongarch_lsx_vmaxi_b:
3913 case Intrinsic::loongarch_lsx_vmaxi_h:
3914 case Intrinsic::loongarch_lsx_vmaxi_w:
3915 case Intrinsic::loongarch_lsx_vmaxi_d:
3916 case Intrinsic::loongarch_lasx_xvmaxi_b:
3917 case Intrinsic::loongarch_lasx_xvmaxi_h:
3918 case Intrinsic::loongarch_lasx_xvmaxi_w:
3919 case Intrinsic::loongarch_lasx_xvmaxi_d:
3920 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3921 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3922 case Intrinsic::loongarch_lsx_vmaxi_bu:
3923 case Intrinsic::loongarch_lsx_vmaxi_hu:
3924 case Intrinsic::loongarch_lsx_vmaxi_wu:
3925 case Intrinsic::loongarch_lsx_vmaxi_du:
3926 case Intrinsic::loongarch_lasx_xvmaxi_bu:
3927 case Intrinsic::loongarch_lasx_xvmaxi_hu:
3928 case Intrinsic::loongarch_lasx_xvmaxi_wu:
3929 case Intrinsic::loongarch_lasx_xvmaxi_du:
3930 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3931 lowerVectorSplatImm<5>(N, 2, DAG));
3932 case Intrinsic::loongarch_lsx_vmin_b:
3933 case Intrinsic::loongarch_lsx_vmin_h:
3934 case Intrinsic::loongarch_lsx_vmin_w:
3935 case Intrinsic::loongarch_lsx_vmin_d:
3936 case Intrinsic::loongarch_lasx_xvmin_b:
3937 case Intrinsic::loongarch_lasx_xvmin_h:
3938 case Intrinsic::loongarch_lasx_xvmin_w:
3939 case Intrinsic::loongarch_lasx_xvmin_d:
3940 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3941 N->getOperand(2));
3942 case Intrinsic::loongarch_lsx_vmin_bu:
3943 case Intrinsic::loongarch_lsx_vmin_hu:
3944 case Intrinsic::loongarch_lsx_vmin_wu:
3945 case Intrinsic::loongarch_lsx_vmin_du:
3946 case Intrinsic::loongarch_lasx_xvmin_bu:
3947 case Intrinsic::loongarch_lasx_xvmin_hu:
3948 case Intrinsic::loongarch_lasx_xvmin_wu:
3949 case Intrinsic::loongarch_lasx_xvmin_du:
3950 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3951 N->getOperand(2));
3952 case Intrinsic::loongarch_lsx_vmini_b:
3953 case Intrinsic::loongarch_lsx_vmini_h:
3954 case Intrinsic::loongarch_lsx_vmini_w:
3955 case Intrinsic::loongarch_lsx_vmini_d:
3956 case Intrinsic::loongarch_lasx_xvmini_b:
3957 case Intrinsic::loongarch_lasx_xvmini_h:
3958 case Intrinsic::loongarch_lasx_xvmini_w:
3959 case Intrinsic::loongarch_lasx_xvmini_d:
3960 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3961 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3962 case Intrinsic::loongarch_lsx_vmini_bu:
3963 case Intrinsic::loongarch_lsx_vmini_hu:
3964 case Intrinsic::loongarch_lsx_vmini_wu:
3965 case Intrinsic::loongarch_lsx_vmini_du:
3966 case Intrinsic::loongarch_lasx_xvmini_bu:
3967 case Intrinsic::loongarch_lasx_xvmini_hu:
3968 case Intrinsic::loongarch_lasx_xvmini_wu:
3969 case Intrinsic::loongarch_lasx_xvmini_du:
3970 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3971 lowerVectorSplatImm<5>(N, 2, DAG));
3972 case Intrinsic::loongarch_lsx_vmul_b:
3973 case Intrinsic::loongarch_lsx_vmul_h:
3974 case Intrinsic::loongarch_lsx_vmul_w:
3975 case Intrinsic::loongarch_lsx_vmul_d:
3976 case Intrinsic::loongarch_lasx_xvmul_b:
3977 case Intrinsic::loongarch_lasx_xvmul_h:
3978 case Intrinsic::loongarch_lasx_xvmul_w:
3979 case Intrinsic::loongarch_lasx_xvmul_d:
3980 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
3981 N->getOperand(2));
3982 case Intrinsic::loongarch_lsx_vmadd_b:
3983 case Intrinsic::loongarch_lsx_vmadd_h:
3984 case Intrinsic::loongarch_lsx_vmadd_w:
3985 case Intrinsic::loongarch_lsx_vmadd_d:
3986 case Intrinsic::loongarch_lasx_xvmadd_b:
3987 case Intrinsic::loongarch_lasx_xvmadd_h:
3988 case Intrinsic::loongarch_lasx_xvmadd_w:
3989 case Intrinsic::loongarch_lasx_xvmadd_d: {
3990 EVT ResTy = N->getValueType(0);
3991 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
3992 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
3993 N->getOperand(3)));
3994 }
3995 case Intrinsic::loongarch_lsx_vmsub_b:
3996 case Intrinsic::loongarch_lsx_vmsub_h:
3997 case Intrinsic::loongarch_lsx_vmsub_w:
3998 case Intrinsic::loongarch_lsx_vmsub_d:
3999 case Intrinsic::loongarch_lasx_xvmsub_b:
4000 case Intrinsic::loongarch_lasx_xvmsub_h:
4001 case Intrinsic::loongarch_lasx_xvmsub_w:
4002 case Intrinsic::loongarch_lasx_xvmsub_d: {
4003 EVT ResTy = N->getValueType(0);
4004 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
4005 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
4006 N->getOperand(3)));
4007 }
4008 case Intrinsic::loongarch_lsx_vdiv_b:
4009 case Intrinsic::loongarch_lsx_vdiv_h:
4010 case Intrinsic::loongarch_lsx_vdiv_w:
4011 case Intrinsic::loongarch_lsx_vdiv_d:
4012 case Intrinsic::loongarch_lasx_xvdiv_b:
4013 case Intrinsic::loongarch_lasx_xvdiv_h:
4014 case Intrinsic::loongarch_lasx_xvdiv_w:
4015 case Intrinsic::loongarch_lasx_xvdiv_d:
4016 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
4017 N->getOperand(2));
4018 case Intrinsic::loongarch_lsx_vdiv_bu:
4019 case Intrinsic::loongarch_lsx_vdiv_hu:
4020 case Intrinsic::loongarch_lsx_vdiv_wu:
4021 case Intrinsic::loongarch_lsx_vdiv_du:
4022 case Intrinsic::loongarch_lasx_xvdiv_bu:
4023 case Intrinsic::loongarch_lasx_xvdiv_hu:
4024 case Intrinsic::loongarch_lasx_xvdiv_wu:
4025 case Intrinsic::loongarch_lasx_xvdiv_du:
4026 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
4027 N->getOperand(2));
4028 case Intrinsic::loongarch_lsx_vmod_b:
4029 case Intrinsic::loongarch_lsx_vmod_h:
4030 case Intrinsic::loongarch_lsx_vmod_w:
4031 case Intrinsic::loongarch_lsx_vmod_d:
4032 case Intrinsic::loongarch_lasx_xvmod_b:
4033 case Intrinsic::loongarch_lasx_xvmod_h:
4034 case Intrinsic::loongarch_lasx_xvmod_w:
4035 case Intrinsic::loongarch_lasx_xvmod_d:
4036 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
4037 N->getOperand(2));
4038 case Intrinsic::loongarch_lsx_vmod_bu:
4039 case Intrinsic::loongarch_lsx_vmod_hu:
4040 case Intrinsic::loongarch_lsx_vmod_wu:
4041 case Intrinsic::loongarch_lsx_vmod_du:
4042 case Intrinsic::loongarch_lasx_xvmod_bu:
4043 case Intrinsic::loongarch_lasx_xvmod_hu:
4044 case Intrinsic::loongarch_lasx_xvmod_wu:
4045 case Intrinsic::loongarch_lasx_xvmod_du:
4046 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
4047 N->getOperand(2));
4048 case Intrinsic::loongarch_lsx_vand_v:
4049 case Intrinsic::loongarch_lasx_xvand_v:
4050 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
4051 N->getOperand(2));
4052 case Intrinsic::loongarch_lsx_vor_v:
4053 case Intrinsic::loongarch_lasx_xvor_v:
4054 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4055 N->getOperand(2));
4056 case Intrinsic::loongarch_lsx_vxor_v:
4057 case Intrinsic::loongarch_lasx_xvxor_v:
4058 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
4059 N->getOperand(2));
4060 case Intrinsic::loongarch_lsx_vnor_v:
4061 case Intrinsic::loongarch_lasx_xvnor_v: {
4062 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4063 N->getOperand(2));
4064 return DAG.getNOT(DL, Res, Res->getValueType(0));
4065 }
4066 case Intrinsic::loongarch_lsx_vandi_b:
4067 case Intrinsic::loongarch_lasx_xvandi_b:
4068 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
4069 lowerVectorSplatImm<8>(N, 2, DAG));
4070 case Intrinsic::loongarch_lsx_vori_b:
4071 case Intrinsic::loongarch_lasx_xvori_b:
4072 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4073 lowerVectorSplatImm<8>(N, 2, DAG));
4074 case Intrinsic::loongarch_lsx_vxori_b:
4075 case Intrinsic::loongarch_lasx_xvxori_b:
4076 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
4077 lowerVectorSplatImm<8>(N, 2, DAG));
4078 case Intrinsic::loongarch_lsx_vsll_b:
4079 case Intrinsic::loongarch_lsx_vsll_h:
4080 case Intrinsic::loongarch_lsx_vsll_w:
4081 case Intrinsic::loongarch_lsx_vsll_d:
4082 case Intrinsic::loongarch_lasx_xvsll_b:
4083 case Intrinsic::loongarch_lasx_xvsll_h:
4084 case Intrinsic::loongarch_lasx_xvsll_w:
4085 case Intrinsic::loongarch_lasx_xvsll_d:
4086 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4087 truncateVecElts(N, DAG));
4088 case Intrinsic::loongarch_lsx_vslli_b:
4089 case Intrinsic::loongarch_lasx_xvslli_b:
4090 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4091 lowerVectorSplatImm<3>(N, 2, DAG));
4092 case Intrinsic::loongarch_lsx_vslli_h:
4093 case Intrinsic::loongarch_lasx_xvslli_h:
4094 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4095 lowerVectorSplatImm<4>(N, 2, DAG));
4096 case Intrinsic::loongarch_lsx_vslli_w:
4097 case Intrinsic::loongarch_lasx_xvslli_w:
4098 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4099 lowerVectorSplatImm<5>(N, 2, DAG));
4100 case Intrinsic::loongarch_lsx_vslli_d:
4101 case Intrinsic::loongarch_lasx_xvslli_d:
4102 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4103 lowerVectorSplatImm<6>(N, 2, DAG));
4104 case Intrinsic::loongarch_lsx_vsrl_b:
4105 case Intrinsic::loongarch_lsx_vsrl_h:
4106 case Intrinsic::loongarch_lsx_vsrl_w:
4107 case Intrinsic::loongarch_lsx_vsrl_d:
4108 case Intrinsic::loongarch_lasx_xvsrl_b:
4109 case Intrinsic::loongarch_lasx_xvsrl_h:
4110 case Intrinsic::loongarch_lasx_xvsrl_w:
4111 case Intrinsic::loongarch_lasx_xvsrl_d:
4112 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4113 truncateVecElts(N, DAG));
4114 case Intrinsic::loongarch_lsx_vsrli_b:
4115 case Intrinsic::loongarch_lasx_xvsrli_b:
4116 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4117 lowerVectorSplatImm<3>(N, 2, DAG));
4118 case Intrinsic::loongarch_lsx_vsrli_h:
4119 case Intrinsic::loongarch_lasx_xvsrli_h:
4120 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4121 lowerVectorSplatImm<4>(N, 2, DAG));
4122 case Intrinsic::loongarch_lsx_vsrli_w:
4123 case Intrinsic::loongarch_lasx_xvsrli_w:
4124 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4125 lowerVectorSplatImm<5>(N, 2, DAG));
4126 case Intrinsic::loongarch_lsx_vsrli_d:
4127 case Intrinsic::loongarch_lasx_xvsrli_d:
4128 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4129 lowerVectorSplatImm<6>(N, 2, DAG));
4130 case Intrinsic::loongarch_lsx_vsra_b:
4131 case Intrinsic::loongarch_lsx_vsra_h:
4132 case Intrinsic::loongarch_lsx_vsra_w:
4133 case Intrinsic::loongarch_lsx_vsra_d:
4134 case Intrinsic::loongarch_lasx_xvsra_b:
4135 case Intrinsic::loongarch_lasx_xvsra_h:
4136 case Intrinsic::loongarch_lasx_xvsra_w:
4137 case Intrinsic::loongarch_lasx_xvsra_d:
4138 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4139 truncateVecElts(N, DAG));
4140 case Intrinsic::loongarch_lsx_vsrai_b:
4141 case Intrinsic::loongarch_lasx_xvsrai_b:
4142 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4143 lowerVectorSplatImm<3>(N, 2, DAG));
4144 case Intrinsic::loongarch_lsx_vsrai_h:
4145 case Intrinsic::loongarch_lasx_xvsrai_h:
4146 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4147 lowerVectorSplatImm<4>(N, 2, DAG));
4148 case Intrinsic::loongarch_lsx_vsrai_w:
4149 case Intrinsic::loongarch_lasx_xvsrai_w:
4150 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4151 lowerVectorSplatImm<5>(N, 2, DAG));
4152 case Intrinsic::loongarch_lsx_vsrai_d:
4153 case Intrinsic::loongarch_lasx_xvsrai_d:
4154 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4155 lowerVectorSplatImm<6>(N, 2, DAG));
4156 case Intrinsic::loongarch_lsx_vclz_b:
4157 case Intrinsic::loongarch_lsx_vclz_h:
4158 case Intrinsic::loongarch_lsx_vclz_w:
4159 case Intrinsic::loongarch_lsx_vclz_d:
4160 case Intrinsic::loongarch_lasx_xvclz_b:
4161 case Intrinsic::loongarch_lasx_xvclz_h:
4162 case Intrinsic::loongarch_lasx_xvclz_w:
4163 case Intrinsic::loongarch_lasx_xvclz_d:
4164 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
4165 case Intrinsic::loongarch_lsx_vpcnt_b:
4166 case Intrinsic::loongarch_lsx_vpcnt_h:
4167 case Intrinsic::loongarch_lsx_vpcnt_w:
4168 case Intrinsic::loongarch_lsx_vpcnt_d:
4169 case Intrinsic::loongarch_lasx_xvpcnt_b:
4170 case Intrinsic::loongarch_lasx_xvpcnt_h:
4171 case Intrinsic::loongarch_lasx_xvpcnt_w:
4172 case Intrinsic::loongarch_lasx_xvpcnt_d:
4173 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
4174 case Intrinsic::loongarch_lsx_vbitclr_b:
4175 case Intrinsic::loongarch_lsx_vbitclr_h:
4176 case Intrinsic::loongarch_lsx_vbitclr_w:
4177 case Intrinsic::loongarch_lsx_vbitclr_d:
4178 case Intrinsic::loongarch_lasx_xvbitclr_b:
4179 case Intrinsic::loongarch_lasx_xvbitclr_h:
4180 case Intrinsic::loongarch_lasx_xvbitclr_w:
4181 case Intrinsic::loongarch_lasx_xvbitclr_d:
4182 return lowerVectorBitClear(N, DAG);
4183 case Intrinsic::loongarch_lsx_vbitclri_b:
4184 case Intrinsic::loongarch_lasx_xvbitclri_b:
4185 return lowerVectorBitClearImm<3>(N, DAG);
4186 case Intrinsic::loongarch_lsx_vbitclri_h:
4187 case Intrinsic::loongarch_lasx_xvbitclri_h:
4188 return lowerVectorBitClearImm<4>(N, DAG);
4189 case Intrinsic::loongarch_lsx_vbitclri_w:
4190 case Intrinsic::loongarch_lasx_xvbitclri_w:
4191 return lowerVectorBitClearImm<5>(N, DAG);
4192 case Intrinsic::loongarch_lsx_vbitclri_d:
4193 case Intrinsic::loongarch_lasx_xvbitclri_d:
4194 return lowerVectorBitClearImm<6>(N, DAG);
4195 case Intrinsic::loongarch_lsx_vbitset_b:
4196 case Intrinsic::loongarch_lsx_vbitset_h:
4197 case Intrinsic::loongarch_lsx_vbitset_w:
4198 case Intrinsic::loongarch_lsx_vbitset_d:
4199 case Intrinsic::loongarch_lasx_xvbitset_b:
4200 case Intrinsic::loongarch_lasx_xvbitset_h:
4201 case Intrinsic::loongarch_lasx_xvbitset_w:
4202 case Intrinsic::loongarch_lasx_xvbitset_d: {
4203 EVT VecTy = N->getValueType(0);
4204 SDValue One = DAG.getConstant(1, DL, VecTy);
4205 return DAG.getNode(
4206 ISD::OR, DL, VecTy, N->getOperand(1),
4207 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4208 }
4209 case Intrinsic::loongarch_lsx_vbitseti_b:
4210 case Intrinsic::loongarch_lasx_xvbitseti_b:
4211 return lowerVectorBitSetImm<3>(N, DAG);
4212 case Intrinsic::loongarch_lsx_vbitseti_h:
4213 case Intrinsic::loongarch_lasx_xvbitseti_h:
4214 return lowerVectorBitSetImm<4>(N, DAG);
4215 case Intrinsic::loongarch_lsx_vbitseti_w:
4216 case Intrinsic::loongarch_lasx_xvbitseti_w:
4217 return lowerVectorBitSetImm<5>(N, DAG);
4218 case Intrinsic::loongarch_lsx_vbitseti_d:
4219 case Intrinsic::loongarch_lasx_xvbitseti_d:
4220 return lowerVectorBitSetImm<6>(N, DAG);
4221 case Intrinsic::loongarch_lsx_vbitrev_b:
4222 case Intrinsic::loongarch_lsx_vbitrev_h:
4223 case Intrinsic::loongarch_lsx_vbitrev_w:
4224 case Intrinsic::loongarch_lsx_vbitrev_d:
4225 case Intrinsic::loongarch_lasx_xvbitrev_b:
4226 case Intrinsic::loongarch_lasx_xvbitrev_h:
4227 case Intrinsic::loongarch_lasx_xvbitrev_w:
4228 case Intrinsic::loongarch_lasx_xvbitrev_d: {
4229 EVT VecTy = N->getValueType(0);
4230 SDValue One = DAG.getConstant(1, DL, VecTy);
4231 return DAG.getNode(
4232 ISD::XOR, DL, VecTy, N->getOperand(1),
4233 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4234 }
4235 case Intrinsic::loongarch_lsx_vbitrevi_b:
4236 case Intrinsic::loongarch_lasx_xvbitrevi_b:
4237 return lowerVectorBitRevImm<3>(N, DAG);
4238 case Intrinsic::loongarch_lsx_vbitrevi_h:
4239 case Intrinsic::loongarch_lasx_xvbitrevi_h:
4240 return lowerVectorBitRevImm<4>(N, DAG);
4241 case Intrinsic::loongarch_lsx_vbitrevi_w:
4242 case Intrinsic::loongarch_lasx_xvbitrevi_w:
4243 return lowerVectorBitRevImm<5>(N, DAG);
4244 case Intrinsic::loongarch_lsx_vbitrevi_d:
4245 case Intrinsic::loongarch_lasx_xvbitrevi_d:
4246 return lowerVectorBitRevImm<6>(N, DAG);
4247 case Intrinsic::loongarch_lsx_vfadd_s:
4248 case Intrinsic::loongarch_lsx_vfadd_d:
4249 case Intrinsic::loongarch_lasx_xvfadd_s:
4250 case Intrinsic::loongarch_lasx_xvfadd_d:
4251 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
4252 N->getOperand(2));
4253 case Intrinsic::loongarch_lsx_vfsub_s:
4254 case Intrinsic::loongarch_lsx_vfsub_d:
4255 case Intrinsic::loongarch_lasx_xvfsub_s:
4256 case Intrinsic::loongarch_lasx_xvfsub_d:
4257 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
4258 N->getOperand(2));
4259 case Intrinsic::loongarch_lsx_vfmul_s:
4260 case Intrinsic::loongarch_lsx_vfmul_d:
4261 case Intrinsic::loongarch_lasx_xvfmul_s:
4262 case Intrinsic::loongarch_lasx_xvfmul_d:
4263 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
4264 N->getOperand(2));
4265 case Intrinsic::loongarch_lsx_vfdiv_s:
4266 case Intrinsic::loongarch_lsx_vfdiv_d:
4267 case Intrinsic::loongarch_lasx_xvfdiv_s:
4268 case Intrinsic::loongarch_lasx_xvfdiv_d:
4269 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
4270 N->getOperand(2));
4271 case Intrinsic::loongarch_lsx_vfmadd_s:
4272 case Intrinsic::loongarch_lsx_vfmadd_d:
4273 case Intrinsic::loongarch_lasx_xvfmadd_s:
4274 case Intrinsic::loongarch_lasx_xvfmadd_d:
4275 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
4276 N->getOperand(2), N->getOperand(3));
4277 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
4278 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4279 N->getOperand(1), N->getOperand(2),
4280 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
4281 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
4282 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
4283 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4284 N->getOperand(1), N->getOperand(2),
4285 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
4286 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
4287 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
4288 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4289 N->getOperand(1), N->getOperand(2),
4290 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
4291 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
4292 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4293 N->getOperand(1), N->getOperand(2),
4294 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
4295 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
4296 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
4297 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
4298 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
4299 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
4300 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
4301 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
4302 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
4303 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
4304 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4305 N->getOperand(1)));
4306 case Intrinsic::loongarch_lsx_vreplve_b:
4307 case Intrinsic::loongarch_lsx_vreplve_h:
4308 case Intrinsic::loongarch_lsx_vreplve_w:
4309 case Intrinsic::loongarch_lsx_vreplve_d:
4310 case Intrinsic::loongarch_lasx_xvreplve_b:
4311 case Intrinsic::loongarch_lasx_xvreplve_h:
4312 case Intrinsic::loongarch_lasx_xvreplve_w:
4313 case Intrinsic::loongarch_lasx_xvreplve_d:
4314 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
4315 N->getOperand(1),
4316 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4317 N->getOperand(2)));
4318 }
4319 return SDValue();
4320}
4321
4323 DAGCombinerInfo &DCI) const {
4324 SelectionDAG &DAG = DCI.DAG;
4325 switch (N->getOpcode()) {
4326 default:
4327 break;
4328 case ISD::AND:
4329 return performANDCombine(N, DAG, DCI, Subtarget);
4330 case ISD::OR:
4331 return performORCombine(N, DAG, DCI, Subtarget);
4332 case ISD::SETCC:
4333 return performSETCCCombine(N, DAG, DCI, Subtarget);
4334 case ISD::SRL:
4335 return performSRLCombine(N, DAG, DCI, Subtarget);
4337 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
4339 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
4340 }
4341 return SDValue();
4342}
4343
4346 if (!ZeroDivCheck)
4347 return MBB;
4348
4349 // Build instructions:
4350 // MBB:
4351 // div(or mod) $dst, $dividend, $divisor
4352 // bnez $divisor, SinkMBB
4353 // BreakMBB:
4354 // break 7 // BRK_DIVZERO
4355 // SinkMBB:
4356 // fallthrough
4357 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
4359 MachineFunction *MF = MBB->getParent();
4360 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4361 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4362 MF->insert(It, BreakMBB);
4363 MF->insert(It, SinkMBB);
4364
4365 // Transfer the remainder of MBB and its successor edges to SinkMBB.
4366 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
4367 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
4368
4369 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
4370 DebugLoc DL = MI.getDebugLoc();
4371 MachineOperand &Divisor = MI.getOperand(2);
4372 Register DivisorReg = Divisor.getReg();
4373
4374 // MBB:
4375 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
4376 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
4377 .addMBB(SinkMBB);
4378 MBB->addSuccessor(BreakMBB);
4379 MBB->addSuccessor(SinkMBB);
4380
4381 // BreakMBB:
4382 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
4383 // definition of BRK_DIVZERO.
4384 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
4385 BreakMBB->addSuccessor(SinkMBB);
4386
4387 // Clear Divisor's kill flag.
4388 Divisor.setIsKill(false);
4389
4390 return SinkMBB;
4391}
4392
4393static MachineBasicBlock *
4395 const LoongArchSubtarget &Subtarget) {
4396 unsigned CondOpc;
4397 switch (MI.getOpcode()) {
4398 default:
4399 llvm_unreachable("Unexpected opcode");
4400 case LoongArch::PseudoVBZ:
4401 CondOpc = LoongArch::VSETEQZ_V;
4402 break;
4403 case LoongArch::PseudoVBZ_B:
4404 CondOpc = LoongArch::VSETANYEQZ_B;
4405 break;
4406 case LoongArch::PseudoVBZ_H:
4407 CondOpc = LoongArch::VSETANYEQZ_H;
4408 break;
4409 case LoongArch::PseudoVBZ_W:
4410 CondOpc = LoongArch::VSETANYEQZ_W;
4411 break;
4412 case LoongArch::PseudoVBZ_D:
4413 CondOpc = LoongArch::VSETANYEQZ_D;
4414 break;
4415 case LoongArch::PseudoVBNZ:
4416 CondOpc = LoongArch::VSETNEZ_V;
4417 break;
4418 case LoongArch::PseudoVBNZ_B:
4419 CondOpc = LoongArch::VSETALLNEZ_B;
4420 break;
4421 case LoongArch::PseudoVBNZ_H:
4422 CondOpc = LoongArch::VSETALLNEZ_H;
4423 break;
4424 case LoongArch::PseudoVBNZ_W:
4425 CondOpc = LoongArch::VSETALLNEZ_W;
4426 break;
4427 case LoongArch::PseudoVBNZ_D:
4428 CondOpc = LoongArch::VSETALLNEZ_D;
4429 break;
4430 case LoongArch::PseudoXVBZ:
4431 CondOpc = LoongArch::XVSETEQZ_V;
4432 break;
4433 case LoongArch::PseudoXVBZ_B:
4434 CondOpc = LoongArch::XVSETANYEQZ_B;
4435 break;
4436 case LoongArch::PseudoXVBZ_H:
4437 CondOpc = LoongArch::XVSETANYEQZ_H;
4438 break;
4439 case LoongArch::PseudoXVBZ_W:
4440 CondOpc = LoongArch::XVSETANYEQZ_W;
4441 break;
4442 case LoongArch::PseudoXVBZ_D:
4443 CondOpc = LoongArch::XVSETANYEQZ_D;
4444 break;
4445 case LoongArch::PseudoXVBNZ:
4446 CondOpc = LoongArch::XVSETNEZ_V;
4447 break;
4448 case LoongArch::PseudoXVBNZ_B:
4449 CondOpc = LoongArch::XVSETALLNEZ_B;
4450 break;
4451 case LoongArch::PseudoXVBNZ_H:
4452 CondOpc = LoongArch::XVSETALLNEZ_H;
4453 break;
4454 case LoongArch::PseudoXVBNZ_W:
4455 CondOpc = LoongArch::XVSETALLNEZ_W;
4456 break;
4457 case LoongArch::PseudoXVBNZ_D:
4458 CondOpc = LoongArch::XVSETALLNEZ_D;
4459 break;
4460 }
4461
4462 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4463 const BasicBlock *LLVM_BB = BB->getBasicBlock();
4464 DebugLoc DL = MI.getDebugLoc();
4467
4468 MachineFunction *F = BB->getParent();
4469 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
4470 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
4471 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
4472
4473 F->insert(It, FalseBB);
4474 F->insert(It, TrueBB);
4475 F->insert(It, SinkBB);
4476
4477 // Transfer the remainder of MBB and its successor edges to Sink.
4478 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
4480
4481 // Insert the real instruction to BB.
4482 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
4483 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
4484
4485 // Insert branch.
4486 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
4487 BB->addSuccessor(FalseBB);
4488 BB->addSuccessor(TrueBB);
4489
4490 // FalseBB.
4491 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4492 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
4493 .addReg(LoongArch::R0)
4494 .addImm(0);
4495 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
4496 FalseBB->addSuccessor(SinkBB);
4497
4498 // TrueBB.
4499 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4500 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
4501 .addReg(LoongArch::R0)
4502 .addImm(1);
4503 TrueBB->addSuccessor(SinkBB);
4504
4505 // SinkBB: merge the results.
4506 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
4507 MI.getOperand(0).getReg())
4508 .addReg(RD1)
4509 .addMBB(FalseBB)
4510 .addReg(RD2)
4511 .addMBB(TrueBB);
4512
4513 // The pseudo instruction is gone now.
4514 MI.eraseFromParent();
4515 return SinkBB;
4516}
4517
4518static MachineBasicBlock *
4520 const LoongArchSubtarget &Subtarget) {
4521 unsigned InsOp;
4522 unsigned HalfSize;
4523 switch (MI.getOpcode()) {
4524 default:
4525 llvm_unreachable("Unexpected opcode");
4526 case LoongArch::PseudoXVINSGR2VR_B:
4527 HalfSize = 16;
4528 InsOp = LoongArch::VINSGR2VR_B;
4529 break;
4530 case LoongArch::PseudoXVINSGR2VR_H:
4531 HalfSize = 8;
4532 InsOp = LoongArch::VINSGR2VR_H;
4533 break;
4534 }
4535 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4536 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
4537 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
4538 DebugLoc DL = MI.getDebugLoc();
4540 // XDst = vector_insert XSrc, Elt, Idx
4541 Register XDst = MI.getOperand(0).getReg();
4542 Register XSrc = MI.getOperand(1).getReg();
4543 Register Elt = MI.getOperand(2).getReg();
4544 unsigned Idx = MI.getOperand(3).getImm();
4545
4546 Register ScratchReg1 = XSrc;
4547 if (Idx >= HalfSize) {
4548 ScratchReg1 = MRI.createVirtualRegister(RC);
4549 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
4550 .addReg(XSrc)
4551 .addReg(XSrc)
4552 .addImm(1);
4553 }
4554
4555 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
4556 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
4557 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
4558 .addReg(ScratchReg1, 0, LoongArch::sub_128);
4559 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
4560 .addReg(ScratchSubReg1)
4561 .addReg(Elt)
4562 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
4563
4564 Register ScratchReg2 = XDst;
4565 if (Idx >= HalfSize)
4566 ScratchReg2 = MRI.createVirtualRegister(RC);
4567
4568 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
4569 .addImm(0)
4570 .addReg(ScratchSubReg2)
4571 .addImm(LoongArch::sub_128);
4572
4573 if (Idx >= HalfSize)
4574 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
4575 .addReg(XSrc)
4576 .addReg(ScratchReg2)
4577 .addImm(2);
4578
4579 MI.eraseFromParent();
4580 return BB;
4581}
4582
4585 const LoongArchSubtarget &Subtarget) {
4586 assert(Subtarget.hasExtLSX());
4587 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4588 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
4589 DebugLoc DL = MI.getDebugLoc();
4591 Register Dst = MI.getOperand(0).getReg();
4592 Register Src = MI.getOperand(1).getReg();
4593 Register ScratchReg1 = MRI.createVirtualRegister(RC);
4594 Register ScratchReg2 = MRI.createVirtualRegister(RC);
4595 Register ScratchReg3 = MRI.createVirtualRegister(RC);
4596
4597 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
4598 BuildMI(*BB, MI, DL,
4599 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
4600 : LoongArch::VINSGR2VR_W),
4601 ScratchReg2)
4602 .addReg(ScratchReg1)
4603 .addReg(Src)
4604 .addImm(0);
4605 BuildMI(
4606 *BB, MI, DL,
4607 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
4608 ScratchReg3)
4609 .addReg(ScratchReg2);
4610 BuildMI(*BB, MI, DL,
4611 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
4612 : LoongArch::VPICKVE2GR_W),
4613 Dst)
4614 .addReg(ScratchReg3)
4615 .addImm(0);
4616
4617 MI.eraseFromParent();
4618 return BB;
4619}
4620
4621MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
4622 MachineInstr &MI, MachineBasicBlock *BB) const {
4623 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4624 DebugLoc DL = MI.getDebugLoc();
4625
4626 switch (MI.getOpcode()) {
4627 default:
4628 llvm_unreachable("Unexpected instr type to insert");
4629 case LoongArch::DIV_W:
4630 case LoongArch::DIV_WU:
4631 case LoongArch::MOD_W:
4632 case LoongArch::MOD_WU:
4633 case LoongArch::DIV_D:
4634 case LoongArch::DIV_DU:
4635 case LoongArch::MOD_D:
4636 case LoongArch::MOD_DU:
4637 return insertDivByZeroTrap(MI, BB);
4638 break;
4639 case LoongArch::WRFCSR: {
4640 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
4641 LoongArch::FCSR0 + MI.getOperand(0).getImm())
4642 .addReg(MI.getOperand(1).getReg());
4643 MI.eraseFromParent();
4644 return BB;
4645 }
4646 case LoongArch::RDFCSR: {
4647 MachineInstr *ReadFCSR =
4648 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
4649 MI.getOperand(0).getReg())
4650 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
4651 ReadFCSR->getOperand(1).setIsUndef();
4652 MI.eraseFromParent();
4653 return BB;
4654 }
4655 case LoongArch::PseudoVBZ:
4656 case LoongArch::PseudoVBZ_B:
4657 case LoongArch::PseudoVBZ_H:
4658 case LoongArch::PseudoVBZ_W:
4659 case LoongArch::PseudoVBZ_D:
4660 case LoongArch::PseudoVBNZ:
4661 case LoongArch::PseudoVBNZ_B:
4662 case LoongArch::PseudoVBNZ_H:
4663 case LoongArch::PseudoVBNZ_W:
4664 case LoongArch::PseudoVBNZ_D:
4665 case LoongArch::PseudoXVBZ:
4666 case LoongArch::PseudoXVBZ_B:
4667 case LoongArch::PseudoXVBZ_H:
4668 case LoongArch::PseudoXVBZ_W:
4669 case LoongArch::PseudoXVBZ_D:
4670 case LoongArch::PseudoXVBNZ:
4671 case LoongArch::PseudoXVBNZ_B:
4672 case LoongArch::PseudoXVBNZ_H:
4673 case LoongArch::PseudoXVBNZ_W:
4674 case LoongArch::PseudoXVBNZ_D:
4675 return emitVecCondBranchPseudo(MI, BB, Subtarget);
4676 case LoongArch::PseudoXVINSGR2VR_B:
4677 case LoongArch::PseudoXVINSGR2VR_H:
4678 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
4679 case LoongArch::PseudoCTPOP:
4680 return emitPseudoCTPOP(MI, BB, Subtarget);
4681 case TargetOpcode::STATEPOINT:
4682 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
4683 // while bl call instruction (where statepoint will be lowered at the
4684 // end) has implicit def. This def is early-clobber as it will be set at
4685 // the moment of the call and earlier than any use is read.
4686 // Add this implicit dead def here as a workaround.
4687 MI.addOperand(*MI.getMF(),
4689 LoongArch::R1, /*isDef*/ true,
4690 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
4691 /*isUndef*/ false, /*isEarlyClobber*/ true));
4692 if (!Subtarget.is64Bit())
4693 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
4694 return emitPatchPoint(MI, BB);
4695 }
4696}
4697
4699 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
4700 unsigned *Fast) const {
4701 if (!Subtarget.hasUAL())
4702 return false;
4703
4704 // TODO: set reasonable speed number.
4705 if (Fast)
4706 *Fast = 1;
4707 return true;
4708}
4709
4710const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
4711 switch ((LoongArchISD::NodeType)Opcode) {
4713 break;
4714
4715#define NODE_NAME_CASE(node) \
4716 case LoongArchISD::node: \
4717 return "LoongArchISD::" #node;
4718
4719 // TODO: Add more target-dependent nodes later.
4720 NODE_NAME_CASE(CALL)
4721 NODE_NAME_CASE(CALL_MEDIUM)
4722 NODE_NAME_CASE(CALL_LARGE)
4723 NODE_NAME_CASE(RET)
4724 NODE_NAME_CASE(TAIL)
4725 NODE_NAME_CASE(TAIL_MEDIUM)
4726 NODE_NAME_CASE(TAIL_LARGE)
4727 NODE_NAME_CASE(SLL_W)
4728 NODE_NAME_CASE(SRA_W)
4729 NODE_NAME_CASE(SRL_W)
4730 NODE_NAME_CASE(BSTRINS)
4731 NODE_NAME_CASE(BSTRPICK)
4732 NODE_NAME_CASE(MOVGR2FR_W_LA64)
4733 NODE_NAME_CASE(MOVFR2GR_S_LA64)
4734 NODE_NAME_CASE(FTINT)
4735 NODE_NAME_CASE(REVB_2H)
4736 NODE_NAME_CASE(REVB_2W)
4737 NODE_NAME_CASE(BITREV_4B)
4738 NODE_NAME_CASE(BITREV_8B)
4739 NODE_NAME_CASE(BITREV_W)
4740 NODE_NAME_CASE(ROTR_W)
4741 NODE_NAME_CASE(ROTL_W)
4742 NODE_NAME_CASE(DIV_W)
4743 NODE_NAME_CASE(DIV_WU)
4744 NODE_NAME_CASE(MOD_W)
4745 NODE_NAME_CASE(MOD_WU)
4746 NODE_NAME_CASE(CLZ_W)
4747 NODE_NAME_CASE(CTZ_W)
4748 NODE_NAME_CASE(DBAR)
4749 NODE_NAME_CASE(IBAR)
4750 NODE_NAME_CASE(BREAK)
4751 NODE_NAME_CASE(SYSCALL)
4752 NODE_NAME_CASE(CRC_W_B_W)
4753 NODE_NAME_CASE(CRC_W_H_W)
4754 NODE_NAME_CASE(CRC_W_W_W)
4755 NODE_NAME_CASE(CRC_W_D_W)
4756 NODE_NAME_CASE(CRCC_W_B_W)
4757 NODE_NAME_CASE(CRCC_W_H_W)
4758 NODE_NAME_CASE(CRCC_W_W_W)
4759 NODE_NAME_CASE(CRCC_W_D_W)
4760 NODE_NAME_CASE(CSRRD)
4761 NODE_NAME_CASE(CSRWR)
4762 NODE_NAME_CASE(CSRXCHG)
4763 NODE_NAME_CASE(IOCSRRD_B)
4764 NODE_NAME_CASE(IOCSRRD_H)
4765 NODE_NAME_CASE(IOCSRRD_W)
4766 NODE_NAME_CASE(IOCSRRD_D)
4767 NODE_NAME_CASE(IOCSRWR_B)
4768 NODE_NAME_CASE(IOCSRWR_H)
4769 NODE_NAME_CASE(IOCSRWR_W)
4770 NODE_NAME_CASE(IOCSRWR_D)
4771 NODE_NAME_CASE(CPUCFG)
4772 NODE_NAME_CASE(MOVGR2FCSR)
4773 NODE_NAME_CASE(MOVFCSR2GR)
4774 NODE_NAME_CASE(CACOP_D)
4775 NODE_NAME_CASE(CACOP_W)
4776 NODE_NAME_CASE(VSHUF)
4777 NODE_NAME_CASE(VPICKEV)
4778 NODE_NAME_CASE(VPICKOD)
4779 NODE_NAME_CASE(VPACKEV)
4780 NODE_NAME_CASE(VPACKOD)
4781 NODE_NAME_CASE(VILVL)
4782 NODE_NAME_CASE(VILVH)
4783 NODE_NAME_CASE(VSHUF4I)
4784 NODE_NAME_CASE(VREPLVEI)
4785 NODE_NAME_CASE(VREPLGR2VR)
4786 NODE_NAME_CASE(XVPERMI)
4787 NODE_NAME_CASE(VPICK_SEXT_ELT)
4788 NODE_NAME_CASE(VPICK_ZEXT_ELT)
4789 NODE_NAME_CASE(VREPLVE)
4790 NODE_NAME_CASE(VALL_ZERO)
4791 NODE_NAME_CASE(VANY_ZERO)
4792 NODE_NAME_CASE(VALL_NONZERO)
4793 NODE_NAME_CASE(VANY_NONZERO)
4794 NODE_NAME_CASE(FRECIPE)
4795 NODE_NAME_CASE(FRSQRTE)
4796 }
4797#undef NODE_NAME_CASE
4798 return nullptr;
4799}
4800
4801//===----------------------------------------------------------------------===//
4802// Calling Convention Implementation
4803//===----------------------------------------------------------------------===//
4804
4805// Eight general-purpose registers a0-a7 used for passing integer arguments,
4806// with a0-a1 reused to return values. Generally, the GPRs are used to pass
4807// fixed-point arguments, and floating-point arguments when no FPR is available
4808// or with soft float ABI.
4809const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
4810 LoongArch::R7, LoongArch::R8, LoongArch::R9,
4811 LoongArch::R10, LoongArch::R11};
4812// Eight floating-point registers fa0-fa7 used for passing floating-point
4813// arguments, and fa0-fa1 are also used to return values.
4814const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
4815 LoongArch::F3, LoongArch::F4, LoongArch::F5,
4816 LoongArch::F6, LoongArch::F7};
4817// FPR32 and FPR64 alias each other.
4819 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
4820 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
4821
4822const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
4823 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
4824 LoongArch::VR6, LoongArch::VR7};
4825
4826const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
4827 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
4828 LoongArch::XR6, LoongArch::XR7};
4829
4830// Pass a 2*GRLen argument that has been split into two GRLen values through
4831// registers or the stack as necessary.
4832static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
4833 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
4834 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
4835 ISD::ArgFlagsTy ArgFlags2) {
4836 unsigned GRLenInBytes = GRLen / 8;
4837 if (Register Reg = State.AllocateReg(ArgGPRs)) {
4838 // At least one half can be passed via register.
4839 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
4840 VA1.getLocVT(), CCValAssign::Full));
4841 } else {
4842 // Both halves must be passed on the stack, with proper alignment.
4843 Align StackAlign =
4844 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
4845 State.addLoc(
4847 State.AllocateStack(GRLenInBytes, StackAlign),
4848 VA1.getLocVT(), CCValAssign::Full));
4850 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4851 LocVT2, CCValAssign::Full));
4852 return false;
4853 }
4854 if (Register Reg = State.AllocateReg(ArgGPRs)) {
4855 // The second half can also be passed via register.
4856 State.addLoc(
4857 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
4858 } else {
4859 // The second half is passed via the stack, without additional alignment.
4861 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4862 LocVT2, CCValAssign::Full));
4863 }
4864 return false;
4865}
4866
4867// Implements the LoongArch calling convention. Returns true upon failure.
4869 unsigned ValNo, MVT ValVT,
4870 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
4871 CCState &State, bool IsFixed, bool IsRet,
4872 Type *OrigTy) {
4873 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
4874 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
4875 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
4876 MVT LocVT = ValVT;
4877
4878 // Any return value split into more than two values can't be returned
4879 // directly.
4880 if (IsRet && ValNo > 1)
4881 return true;
4882
4883 // If passing a variadic argument, or if no FPR is available.
4884 bool UseGPRForFloat = true;
4885
4886 switch (ABI) {
4887 default:
4888 llvm_unreachable("Unexpected ABI");
4889 break;
4894 UseGPRForFloat = !IsFixed;
4895 break;
4898 break;
4899 }
4900
4901 // FPR32 and FPR64 alias each other.
4902 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
4903 UseGPRForFloat = true;
4904
4905 if (UseGPRForFloat && ValVT == MVT::f32) {
4906 LocVT = GRLenVT;
4907 LocInfo = CCValAssign::BCvt;
4908 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
4909 LocVT = MVT::i64;
4910 LocInfo = CCValAssign::BCvt;
4911 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
4912 // TODO: Handle passing f64 on LA32 with D feature.
4913 report_fatal_error("Passing f64 with GPR on LA32 is undefined");
4914 }
4915
4916 // If this is a variadic argument, the LoongArch calling convention requires
4917 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
4918 // byte alignment. An aligned register should be used regardless of whether
4919 // the original argument was split during legalisation or not. The argument
4920 // will not be passed by registers if the original type is larger than
4921 // 2*GRLen, so the register alignment rule does not apply.
4922 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
4923 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
4924 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
4925 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
4926 // Skip 'odd' register if necessary.
4927 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
4928 State.AllocateReg(ArgGPRs);
4929 }
4930
4931 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
4932 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
4933 State.getPendingArgFlags();
4934
4935 assert(PendingLocs.size() == PendingArgFlags.size() &&
4936 "PendingLocs and PendingArgFlags out of sync");
4937
4938 // Split arguments might be passed indirectly, so keep track of the pending
4939 // values.
4940 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
4941 LocVT = GRLenVT;
4942 LocInfo = CCValAssign::Indirect;
4943 PendingLocs.push_back(
4944 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
4945 PendingArgFlags.push_back(ArgFlags);
4946 if (!ArgFlags.isSplitEnd()) {
4947 return false;
4948 }
4949 }
4950
4951 // If the split argument only had two elements, it should be passed directly
4952 // in registers or on the stack.
4953 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
4954 PendingLocs.size() <= 2) {
4955 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
4956 // Apply the normal calling convention rules to the first half of the
4957 // split argument.
4958 CCValAssign VA = PendingLocs[0];
4959 ISD::ArgFlagsTy AF = PendingArgFlags[0];
4960 PendingLocs.clear();
4961 PendingArgFlags.clear();
4962 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
4963 ArgFlags);
4964 }
4965
4966 // Allocate to a register if possible, or else a stack slot.
4967 Register Reg;
4968 unsigned StoreSizeBytes = GRLen / 8;
4969 Align StackAlign = Align(GRLen / 8);
4970
4971 if (ValVT == MVT::f32 && !UseGPRForFloat)
4972 Reg = State.AllocateReg(ArgFPR32s);
4973 else if (ValVT == MVT::f64 && !UseGPRForFloat)
4974 Reg = State.AllocateReg(ArgFPR64s);
4975 else if (ValVT.is128BitVector())
4976 Reg = State.AllocateReg(ArgVRs);
4977 else if (ValVT.is256BitVector())
4978 Reg = State.AllocateReg(ArgXRs);
4979 else
4980 Reg = State.AllocateReg(ArgGPRs);
4981
4982 unsigned StackOffset =
4983 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
4984
4985 // If we reach this point and PendingLocs is non-empty, we must be at the
4986 // end of a split argument that must be passed indirectly.
4987 if (!PendingLocs.empty()) {
4988 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
4989 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
4990 for (auto &It : PendingLocs) {
4991 if (Reg)
4992 It.convertToReg(Reg);
4993 else
4994 It.convertToMem(StackOffset);
4995 State.addLoc(It);
4996 }
4997 PendingLocs.clear();
4998 PendingArgFlags.clear();
4999 return false;
5000 }
5001 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
5002 "Expected an GRLenVT at this stage");
5003
5004 if (Reg) {
5005 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5006 return false;
5007 }
5008
5009 // When a floating-point value is passed on the stack, no bit-cast is needed.
5010 if (ValVT.isFloatingPoint()) {
5011 LocVT = ValVT;
5012 LocInfo = CCValAssign::Full;
5013 }
5014
5015 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
5016 return false;
5017}
5018
5019void LoongArchTargetLowering::analyzeInputArgs(
5020 MachineFunction &MF, CCState &CCInfo,
5021 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
5022 LoongArchCCAssignFn Fn) const {
5024 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5025 MVT ArgVT = Ins[i].VT;
5026 Type *ArgTy = nullptr;
5027 if (IsRet)
5028 ArgTy = FType->getReturnType();
5029 else if (Ins[i].isOrigArg())
5030 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
5033 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
5034 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
5035 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
5036 << '\n');
5037 llvm_unreachable("");
5038 }
5039 }
5040}
5041
5042void LoongArchTargetLowering::analyzeOutputArgs(
5043 MachineFunction &MF, CCState &CCInfo,
5044 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
5045 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
5046 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5047 MVT ArgVT = Outs[i].VT;
5048 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
5051 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
5052 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
5053 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
5054 << "\n");
5055 llvm_unreachable("");
5056 }
5057 }
5058}
5059
5060// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
5061// values.
5063 const CCValAssign &VA, const SDLoc &DL) {
5064 switch (VA.getLocInfo()) {
5065 default:
5066 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5067 case CCValAssign::Full:
5069 break;
5070 case CCValAssign::BCvt:
5071 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5072 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
5073 else
5074 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
5075 break;
5076 }
5077 return Val;
5078}
5079
5081 const CCValAssign &VA, const SDLoc &DL,
5082 const ISD::InputArg &In,
5083 const LoongArchTargetLowering &TLI) {
5086 EVT LocVT = VA.getLocVT();
5087 SDValue Val;
5088 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
5089 Register VReg = RegInfo.createVirtualRegister(RC);
5090 RegInfo.addLiveIn(VA.getLocReg(), VReg);
5091 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
5092
5093 // If input is sign extended from 32 bits, note it for the OptW pass.
5094 if (In.isOrigArg()) {
5095 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
5096 if (OrigArg->getType()->isIntegerTy()) {
5097 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
5098 // An input zero extended from i31 can also be considered sign extended.
5099 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
5100 (BitWidth < 32 && In.Flags.isZExt())) {
5103 LAFI->addSExt32Register(VReg);
5104 }
5105 }
5106 }
5107
5108 return convertLocVTToValVT(DAG, Val, VA, DL);
5109}
5110
5111// The caller is responsible for loading the full value if the argument is
5112// passed with CCValAssign::Indirect.
5114 const CCValAssign &VA, const SDLoc &DL) {
5116 MachineFrameInfo &MFI = MF.getFrameInfo();
5117 EVT ValVT = VA.getValVT();
5118 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
5119 /*IsImmutable=*/true);
5120 SDValue FIN = DAG.getFrameIndex(
5122
5123 ISD::LoadExtType ExtType;
5124 switch (VA.getLocInfo()) {
5125 default:
5126 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5127 case CCValAssign::Full:
5129 case CCValAssign::BCvt:
5130 ExtType = ISD::NON_EXTLOAD;
5131 break;
5132 }
5133 return DAG.getExtLoad(
5134 ExtType, DL, VA.getLocVT(), Chain, FIN,
5136}
5137
5139 const CCValAssign &VA, const SDLoc &DL) {
5140 EVT LocVT = VA.getLocVT();
5141
5142 switch (VA.getLocInfo()) {
5143 default:
5144 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5145 case CCValAssign::Full:
5146 break;
5147 case CCValAssign::BCvt:
5148 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5149 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
5150 else
5151 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
5152 break;
5153 }
5154 return Val;
5155}
5156
5157static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
5158 CCValAssign::LocInfo LocInfo,
5159 ISD::ArgFlagsTy ArgFlags, CCState &State) {
5160 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
5161 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
5162 // s0 s1 s2 s3 s4 s5 s6 s7 s8
5163 static const MCPhysReg GPRList[] = {
5164 LoongArch::R23, LoongArch::R24, LoongArch::R25,
5165 LoongArch::R26, LoongArch::R27, LoongArch::R28,
5166 LoongArch::R29, LoongArch::R30, LoongArch::R31};
5167 if (MCRegister Reg = State.AllocateReg(GPRList)) {
5168 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5169 return false;
5170 }
5171 }
5172
5173 if (LocVT == MVT::f32) {
5174 // Pass in STG registers: F1, F2, F3, F4
5175 // fs0,fs1,fs2,fs3
5176 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
5177 LoongArch::F26, LoongArch::F27};
5178 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
5179 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5180 return false;
5181 }
5182 }
5183
5184 if (LocVT == MVT::f64) {
5185 // Pass in STG registers: D1, D2, D3, D4
5186 // fs4,fs5,fs6,fs7
5187 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
5188 LoongArch::F30_64, LoongArch::F31_64};
5189 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
5190 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5191 return false;
5192 }
5193 }
5194
5195 report_fatal_error("No registers left in GHC calling convention");
5196 return true;
5197}
5198
5199// Transform physical registers into virtual registers.
5201 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5202 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5203 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5204
5206
5207 switch (CallConv) {
5208 default:
5209 llvm_unreachable("Unsupported calling convention");
5210 case CallingConv::C:
5211 case CallingConv::Fast:
5212 break;
5213 case CallingConv::GHC:
5214 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
5215 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
5217 "GHC calling convention requires the F and D extensions");
5218 }
5219
5220 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5221 MVT GRLenVT = Subtarget.getGRLenVT();
5222 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
5223 // Used with varargs to acumulate store chains.
5224 std::vector<SDValue> OutChains;
5225
5226 // Assign locations to all of the incoming arguments.
5228 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5229
5230 if (CallConv == CallingConv::GHC)
5232 else
5233 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
5234
5235 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5236 CCValAssign &VA = ArgLocs[i];
5237 SDValue ArgValue;
5238 if (VA.isRegLoc())
5239 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
5240 else
5241 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
5242 if (VA.getLocInfo() == CCValAssign::Indirect) {
5243 // If the original argument was split and passed by reference, we need to
5244 // load all parts of it here (using the same address).
5245 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
5247 unsigned ArgIndex = Ins[i].OrigArgIndex;
5248 unsigned ArgPartOffset = Ins[i].PartOffset;
5249 assert(ArgPartOffset == 0);
5250 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
5251 CCValAssign &PartVA = ArgLocs[i + 1];
5252 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
5253 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5254 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
5255 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
5257 ++i;
5258 }
5259 continue;
5260 }
5261 InVals.push_back(ArgValue);
5262 }
5263
5264 if (IsVarArg) {
5266 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
5267 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
5268 MachineFrameInfo &MFI = MF.getFrameInfo();
5269 MachineRegisterInfo &RegInfo = MF.getRegInfo();
5270 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
5271
5272 // Offset of the first variable argument from stack pointer, and size of
5273 // the vararg save area. For now, the varargs save area is either zero or
5274 // large enough to hold a0-a7.
5275 int VaArgOffset, VarArgsSaveSize;
5276
5277 // If all registers are allocated, then all varargs must be passed on the
5278 // stack and we don't need to save any argregs.
5279 if (ArgRegs.size() == Idx) {
5280 VaArgOffset = CCInfo.getStackSize();
5281 VarArgsSaveSize = 0;
5282 } else {
5283 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
5284 VaArgOffset = -VarArgsSaveSize;
5285 }
5286
5287 // Record the frame index of the first variable argument
5288 // which is a value necessary to VASTART.
5289 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5290 LoongArchFI->setVarArgsFrameIndex(FI);
5291
5292 // If saving an odd number of registers then create an extra stack slot to
5293 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
5294 // offsets to even-numbered registered remain 2*GRLen-aligned.
5295 if (Idx % 2) {
5296 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
5297 true);
5298 VarArgsSaveSize += GRLenInBytes;
5299 }
5300
5301 // Copy the integer registers that may have been used for passing varargs
5302 // to the vararg save area.
5303 for (unsigned I = Idx; I < ArgRegs.size();
5304 ++I, VaArgOffset += GRLenInBytes) {
5305 const Register Reg = RegInfo.createVirtualRegister(RC);
5306 RegInfo.addLiveIn(ArgRegs[I], Reg);
5307 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
5308 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5309 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5310 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
5312 cast<StoreSDNode>(Store.getNode())
5313 ->getMemOperand()
5314 ->setValue((Value *)nullptr);
5315 OutChains.push_back(Store);
5316 }
5317 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
5318 }
5319
5320 // All stores are grouped in one node to allow the matching between
5321 // the size of Ins and InVals. This only happens for vararg functions.
5322 if (!OutChains.empty()) {
5323 OutChains.push_back(Chain);
5324 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
5325 }
5326
5327 return Chain;
5328}
5329
5331 return CI->isTailCall();
5332}
5333
5334// Check if the return value is used as only a return value, as otherwise
5335// we can't perform a tail-call.
5337 SDValue &Chain) const {
5338 if (N->getNumValues() != 1)
5339 return false;
5340 if (!N->hasNUsesOfValue(1, 0))
5341 return false;
5342
5343 SDNode *Copy = *N->user_begin();
5344 if (Copy->getOpcode() != ISD::CopyToReg)
5345 return false;
5346
5347 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
5348 // isn't safe to perform a tail call.
5349 if (Copy->getGluedNode())
5350 return false;
5351
5352 // The copy must be used by a LoongArchISD::RET, and nothing else.
5353 bool HasRet = false;
5354 for (SDNode *Node : Copy->users()) {
5355 if (Node->getOpcode() != LoongArchISD::RET)
5356 return false;
5357 HasRet = true;
5358 }
5359
5360 if (!HasRet)
5361 return false;
5362
5363 Chain = Copy->getOperand(0);
5364 return true;
5365}
5366
5367// Check whether the call is eligible for tail call optimization.
5368bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
5369 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
5370 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
5371
5372 auto CalleeCC = CLI.CallConv;
5373 auto &Outs = CLI.Outs;
5374 auto &Caller = MF.getFunction();
5375 auto CallerCC = Caller.getCallingConv();
5376
5377 // Do not tail call opt if the stack is used to pass parameters.
5378 if (CCInfo.getStackSize() != 0)
5379 return false;
5380
5381 // Do not tail call opt if any parameters need to be passed indirectly.
5382 for (auto &VA : ArgLocs)
5383 if (VA.getLocInfo() == CCValAssign::Indirect)
5384 return false;
5385
5386 // Do not tail call opt if either caller or callee uses struct return
5387 // semantics.
5388 auto IsCallerStructRet = Caller.hasStructRetAttr();
5389 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
5390 if (IsCallerStructRet || IsCalleeStructRet)
5391 return false;
5392
5393 // Do not tail call opt if either the callee or caller has a byval argument.
5394 for (auto &Arg : Outs)
5395 if (Arg.Flags.isByVal())
5396 return false;
5397
5398 // The callee has to preserve all registers the caller needs to preserve.
5399 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
5400 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5401 if (CalleeCC != CallerCC) {
5402 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5403 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5404 return false;
5405 }
5406 return true;
5407}
5408
5410 return DAG.getDataLayout().getPrefTypeAlign(
5411 VT.getTypeForEVT(*DAG.getContext()));
5412}
5413
5414// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
5415// and output parameter nodes.
5416SDValue
5418 SmallVectorImpl<SDValue> &InVals) const {
5419 SelectionDAG &DAG = CLI.DAG;
5420 SDLoc &DL = CLI.DL;
5422 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5424 SDValue Chain = CLI.Chain;
5425 SDValue Callee = CLI.Callee;
5426 CallingConv::ID CallConv = CLI.CallConv;
5427 bool IsVarArg = CLI.IsVarArg;
5428 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5429 MVT GRLenVT = Subtarget.getGRLenVT();
5430 bool &IsTailCall = CLI.IsTailCall;
5431
5433
5434 // Analyze the operands of the call, assigning locations to each operand.
5436 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5437
5438 if (CallConv == CallingConv::GHC)
5439 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
5440 else
5441 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
5442
5443 // Check if it's really possible to do a tail call.
5444 if (IsTailCall)
5445 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
5446
5447 if (IsTailCall)
5448 ++NumTailCalls;
5449 else if (CLI.CB && CLI.CB->isMustTailCall())
5450 report_fatal_error("failed to perform tail call elimination on a call "
5451 "site marked musttail");
5452
5453 // Get a count of how many bytes are to be pushed on the stack.
5454 unsigned NumBytes = ArgCCInfo.getStackSize();
5455
5456 // Create local copies for byval args.
5457 SmallVector<SDValue> ByValArgs;
5458 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5459 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5460 if (!Flags.isByVal())
5461 continue;
5462
5463 SDValue Arg = OutVals[i];
5464 unsigned Size = Flags.getByValSize();
5465 Align Alignment = Flags.getNonZeroByValAlign();
5466
5467 int FI =
5468 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
5469 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5470 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
5471
5472 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
5473 /*IsVolatile=*/false,
5474 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
5476 ByValArgs.push_back(FIPtr);
5477 }
5478
5479 if (!IsTailCall)
5480 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
5481
5482 // Copy argument values to their designated locations.
5484 SmallVector<SDValue> MemOpChains;
5485 SDValue StackPtr;
5486 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
5487 CCValAssign &VA = ArgLocs[i];
5488 SDValue ArgValue = OutVals[i];
5489 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5490
5491 // Promote the value if needed.
5492 // For now, only handle fully promoted and indirect arguments.
5493 if (VA.getLocInfo() == CCValAssign::Indirect) {
5494 // Store the argument in a stack slot and pass its address.
5495 Align StackAlign =
5496 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
5497 getPrefTypeAlign(ArgValue.getValueType(), DAG));
5498 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
5499 // If the original argument was split and passed by reference, we need to
5500 // store the required parts of it here (and pass just one address).
5501 unsigned ArgIndex = Outs[i].OrigArgIndex;
5502 unsigned ArgPartOffset = Outs[i].PartOffset;
5503 assert(ArgPartOffset == 0);
5504 // Calculate the total size to store. We don't have access to what we're
5505 // actually storing other than performing the loop and collecting the
5506 // info.
5508 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
5509 SDValue PartValue = OutVals[i + 1];
5510 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
5511 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5512 EVT PartVT = PartValue.getValueType();
5513
5514 StoredSize += PartVT.getStoreSize();
5515 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
5516 Parts.push_back(std::make_pair(PartValue, Offset));
5517 ++i;
5518 }
5519 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
5520 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
5521 MemOpChains.push_back(
5522 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
5524 for (const auto &Part : Parts) {
5525 SDValue PartValue = Part.first;
5526 SDValue PartOffset = Part.second;
5528 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
5529 MemOpChains.push_back(
5530 DAG.getStore(Chain, DL, PartValue, Address,
5532 }
5533 ArgValue = SpillSlot;
5534 } else {
5535 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
5536 }
5537
5538 // Use local copy if it is a byval arg.
5539 if (Flags.isByVal())
5540 ArgValue = ByValArgs[j++];
5541
5542 if (VA.isRegLoc()) {
5543 // Queue up the argument copies and emit them at the end.
5544 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
5545 } else {
5546 assert(VA.isMemLoc() && "Argument not register or memory");
5547 assert(!IsTailCall && "Tail call not allowed if stack is used "
5548 "for passing parameters");
5549
5550 // Work out the address of the stack slot.
5551 if (!StackPtr.getNode())
5552 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
5554 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
5556
5557 // Emit the store.
5558 MemOpChains.push_back(
5559 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
5560 }
5561 }
5562
5563 // Join the stores, which are independent of one another.
5564 if (!MemOpChains.empty())
5565 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
5566
5567 SDValue Glue;
5568
5569 // Build a sequence of copy-to-reg nodes, chained and glued together.
5570 for (auto &Reg : RegsToPass) {
5571 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
5572 Glue = Chain.getValue(1);
5573 }
5574
5575 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
5576 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
5577 // split it and then direct call can be matched by PseudoCALL.
5578 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
5579 const GlobalValue *GV = S->getGlobal();
5580 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
5583 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
5584 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5585 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
5588 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
5589 }
5590
5591 // The first call operand is the chain and the second is the target address.
5593 Ops.push_back(Chain);
5594 Ops.push_back(Callee);
5595
5596 // Add argument registers to the end of the list so that they are
5597 // known live into the call.
5598 for (auto &Reg : RegsToPass)
5599 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
5600
5601 if (!IsTailCall) {
5602 // Add a register mask operand representing the call-preserved registers.
5603 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5604 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
5605 assert(Mask && "Missing call preserved mask for calling convention");
5606 Ops.push_back(DAG.getRegisterMask(Mask));
5607 }
5608
5609 // Glue the call to the argument copies, if any.
5610 if (Glue.getNode())
5611 Ops.push_back(Glue);
5612
5613 // Emit the call.
5614 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
5615 unsigned Op;
5616 switch (DAG.getTarget().getCodeModel()) {
5617 default:
5618 report_fatal_error("Unsupported code model");
5619 case CodeModel::Small:
5620 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
5621 break;
5622 case CodeModel::Medium:
5623 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
5625 break;
5626 case CodeModel::Large:
5627 assert(Subtarget.is64Bit() && "Large code model requires LA64");
5629 break;
5630 }
5631
5632 if (IsTailCall) {
5634 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
5635 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
5636 return Ret;
5637 }
5638
5639 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
5640 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5641 Glue = Chain.getValue(1);
5642
5643 // Mark the end of the call, which is glued to the call itself.
5644 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
5645 Glue = Chain.getValue(1);
5646
5647 // Assign locations to each value returned by this call.
5649 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
5650 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
5651
5652 // Copy all of the result registers out of their specified physreg.
5653 for (auto &VA : RVLocs) {
5654 // Copy the value out.
5655 SDValue RetValue =
5656 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
5657 // Glue the RetValue to the end of the call sequence.
5658 Chain = RetValue.getValue(1);
5659 Glue = RetValue.getValue(2);
5660
5661 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
5662
5663 InVals.push_back(RetValue);
5664 }
5665
5666 return Chain;
5667}
5668
5670 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
5671 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
5673 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
5674
5675 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5676 LoongArchABI::ABI ABI =
5677 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
5678 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
5679 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
5680 nullptr))
5681 return false;
5682 }
5683 return true;
5684}
5685
5687 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5689 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
5690 SelectionDAG &DAG) const {
5691 // Stores the assignment of the return value to a location.
5693
5694 // Info about the registers and stack slot.
5695 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
5696 *DAG.getContext());
5697
5698 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
5699 nullptr, CC_LoongArch);
5700 if (CallConv == CallingConv::GHC && !RVLocs.empty())
5701 report_fatal_error("GHC functions return void only");
5702 SDValue Glue;
5703 SmallVector<SDValue, 4> RetOps(1, Chain);
5704
5705 // Copy the result values into the output registers.
5706 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
5707 CCValAssign &VA = RVLocs[i];
5708 assert(VA.isRegLoc() && "Can only return in registers!");
5709
5710 // Handle a 'normal' return.
5711 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
5712 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
5713
5714 // Guarantee that all emitted copies are stuck together.
5715 Glue = Chain.getValue(1);
5716 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5717 }
5718
5719 RetOps[0] = Chain; // Update chain.
5720
5721 // Add the glue node if we have it.
5722 if (Glue.getNode())
5723 RetOps.push_back(Glue);
5724
5725 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
5726}
5727
5729 EVT VT) const {
5730 if (!Subtarget.hasExtLSX())
5731 return false;
5732
5733 if (VT == MVT::f32) {
5734 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
5735 return (masked == 0x3e000000 || masked == 0x40000000);
5736 }
5737
5738 if (VT == MVT::f64) {
5739 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
5740 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
5741 }
5742
5743 return false;
5744}
5745
5746bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5747 bool ForCodeSize) const {
5748 // TODO: Maybe need more checks here after vector extension is supported.
5749 if (VT == MVT::f32 && !Subtarget.hasBasicF())
5750 return false;
5751 if (VT == MVT::f64 && !Subtarget.hasBasicD())
5752 return false;
5753 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
5754}
5755
5757 return true;
5758}
5759
5761 return true;
5762}
5763
5764bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
5765 const Instruction *I) const {
5766 if (!Subtarget.is64Bit())
5767 return isa<LoadInst>(I) || isa<StoreInst>(I);
5768
5769 if (isa<LoadInst>(I))
5770 return true;
5771
5772 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
5773 // require fences beacuse we can use amswap_db.[w/d].
5774 Type *Ty = I->getOperand(0)->getType();
5775 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
5776 unsigned Size = Ty->getIntegerBitWidth();
5777 return (Size == 8 || Size == 16);
5778 }
5779
5780 return false;
5781}
5782
5784 LLVMContext &Context,
5785 EVT VT) const {
5786 if (!VT.isVector())
5787 return getPointerTy(DL);
5789}
5790
5792 // TODO: Support vectors.
5793 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
5794}
5795
5797 const CallInst &I,
5798 MachineFunction &MF,
5799 unsigned Intrinsic) const {
5800 switch (Intrinsic) {
5801 default:
5802 return false;
5803 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
5804 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
5805 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
5806 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
5808 Info.memVT = MVT::i32;
5809 Info.ptrVal = I.getArgOperand(0);
5810 Info.offset = 0;
5811 Info.align = Align(4);
5814 return true;
5815 // TODO: Add more Intrinsics later.
5816 }
5817}
5818
5819// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
5820// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
5821// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
5822// regression, we need to implement it manually.
5825
5827 Op == AtomicRMWInst::And) &&
5828 "Unable to expand");
5829 unsigned MinWordSize = 4;
5830
5831 IRBuilder<> Builder(AI);
5832 LLVMContext &Ctx = Builder.getContext();
5833 const DataLayout &DL = AI->getDataLayout();
5834 Type *ValueType = AI->getType();
5835 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
5836
5837 Value *Addr = AI->getPointerOperand();
5838 PointerType *PtrTy = cast<PointerType>(Addr->getType());
5839 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
5840
5841 Value *AlignedAddr = Builder.CreateIntrinsic(
5842 Intrinsic::ptrmask, {PtrTy, IntTy},
5843 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
5844 "AlignedAddr");
5845
5846 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
5847 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
5848 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
5849 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
5850 Value *Mask = Builder.CreateShl(
5851 ConstantInt::get(WordType,
5852 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
5853 ShiftAmt, "Mask");
5854 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
5855 Value *ValOperand_Shifted =
5856 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
5857 ShiftAmt, "ValOperand_Shifted");
5858 Value *NewOperand;
5859 if (Op == AtomicRMWInst::And)
5860 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
5861 else
5862 NewOperand = ValOperand_Shifted;
5863
5864 AtomicRMWInst *NewAI =
5865 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
5866 AI->getOrdering(), AI->getSyncScopeID());
5867
5868 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
5869 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
5870 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
5871 AI->replaceAllUsesWith(FinalOldResult);
5872 AI->eraseFromParent();
5873}
5874
5877 // TODO: Add more AtomicRMWInst that needs to be extended.
5878
5879 // Since floating-point operation requires a non-trivial set of data
5880 // operations, use CmpXChg to expand.
5881 if (AI->isFloatingPointOperation() ||
5887
5888 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
5891 AI->getOperation() == AtomicRMWInst::Sub)) {
5893 }
5894
5895 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
5896 if (Subtarget.hasLAMCAS()) {
5897 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
5901 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
5903 }
5904
5905 if (Size == 8 || Size == 16)
5908}
5909
5910static Intrinsic::ID
5912 AtomicRMWInst::BinOp BinOp) {
5913 if (GRLen == 64) {
5914 switch (BinOp) {
5915 default:
5916 llvm_unreachable("Unexpected AtomicRMW BinOp");
5918 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
5919 case AtomicRMWInst::Add:
5920 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
5921 case AtomicRMWInst::Sub:
5922 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
5924 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
5926 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
5928 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
5929 case AtomicRMWInst::Max:
5930 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
5931 case AtomicRMWInst::Min:
5932 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
5933 // TODO: support other AtomicRMWInst.
5934 }
5935 }
5936
5937 if (GRLen == 32) {
5938 switch (BinOp) {
5939 default:
5940 llvm_unreachable("Unexpected AtomicRMW BinOp");
5942 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
5943 case AtomicRMWInst::Add:
5944 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
5945 case AtomicRMWInst::Sub:
5946 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
5948 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
5949 // TODO: support other AtomicRMWInst.
5950 }
5951 }
5952
5953 llvm_unreachable("Unexpected GRLen\n");
5954}
5955
5958 AtomicCmpXchgInst *CI) const {
5959
5960 if (Subtarget.hasLAMCAS())
5962
5964 if (Size == 8 || Size == 16)
5967}
5968
5970 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
5971 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
5972 AtomicOrdering FailOrd = CI->getFailureOrdering();
5973 Value *FailureOrdering =
5974 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
5975
5976 // TODO: Support cmpxchg on LA32.
5977 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
5978 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
5979 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
5980 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5981 Type *Tys[] = {AlignedAddr->getType()};
5982 Value *Result = Builder.CreateIntrinsic(
5983 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
5984 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5985 return Result;
5986}
5987
5989 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
5990 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
5991 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
5992 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
5993 // mask, as this produces better code than the LL/SC loop emitted by
5994 // int_loongarch_masked_atomicrmw_xchg.
5995 if (AI->getOperation() == AtomicRMWInst::Xchg &&
5996 isa<ConstantInt>(AI->getValOperand())) {
5997 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
5998 if (CVal->isZero())
5999 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
6000 Builder.CreateNot(Mask, "Inv_Mask"),
6001 AI->getAlign(), Ord);
6002 if (CVal->isMinusOne())
6003 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
6004 AI->getAlign(), Ord);
6005 }
6006
6007 unsigned GRLen = Subtarget.getGRLen();
6008 Value *Ordering =
6009 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
6010 Type *Tys[] = {AlignedAddr->getType()};
6012 AI->getModule(),
6014
6015 if (GRLen == 64) {
6016 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
6017 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
6018 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
6019 }
6020
6021 Value *Result;
6022
6023 // Must pass the shift amount needed to sign extend the loaded value prior
6024 // to performing a signed comparison for min/max. ShiftAmt is the number of
6025 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
6026 // is the number of bits to left+right shift the value in order to
6027 // sign-extend.
6028 if (AI->getOperation() == AtomicRMWInst::Min ||
6030 const DataLayout &DL = AI->getDataLayout();
6031 unsigned ValWidth =
6032 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
6033 Value *SextShamt =
6034 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
6035 Result = Builder.CreateCall(LlwOpScwLoop,
6036 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
6037 } else {
6038 Result =
6039 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
6040 }
6041
6042 if (GRLen == 64)
6043 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
6044 return Result;
6045}
6046
6048 const MachineFunction &MF, EVT VT) const {
6049 VT = VT.getScalarType();
6050
6051 if (!VT.isSimple())
6052 return false;
6053
6054 switch (VT.getSimpleVT().SimpleTy) {
6055 case MVT::f32:
6056 case MVT::f64:
6057 return true;
6058 default:
6059 break;
6060 }
6061
6062 return false;
6063}
6064
6066 const Constant *PersonalityFn) const {
6067 return LoongArch::R4;
6068}
6069
6071 const Constant *PersonalityFn) const {
6072 return LoongArch::R5;
6073}
6074
6075//===----------------------------------------------------------------------===//
6076// Target Optimization Hooks
6077//===----------------------------------------------------------------------===//
6078
6080 const LoongArchSubtarget &Subtarget) {
6081 // Feature FRECIPE instrucions relative accuracy is 2^-14.
6082 // IEEE float has 23 digits and double has 52 digits.
6083 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
6084 return RefinementSteps;
6085}
6086
6088 SelectionDAG &DAG, int Enabled,
6089 int &RefinementSteps,
6090 bool &UseOneConstNR,
6091 bool Reciprocal) const {
6092 if (Subtarget.hasFrecipe()) {
6093 SDLoc DL(Operand);
6094 EVT VT = Operand.getValueType();
6095
6096 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
6097 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
6098 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
6099 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
6100 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
6101
6102 if (RefinementSteps == ReciprocalEstimate::Unspecified)
6103 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
6104
6105 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
6106 if (Reciprocal)
6107 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
6108
6109 return Estimate;
6110 }
6111 }
6112
6113 return SDValue();
6114}
6115
6117 SelectionDAG &DAG,
6118 int Enabled,
6119 int &RefinementSteps) const {
6120 if (Subtarget.hasFrecipe()) {
6121 SDLoc DL(Operand);
6122 EVT VT = Operand.getValueType();
6123
6124 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
6125 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
6126 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
6127 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
6128 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
6129
6130 if (RefinementSteps == ReciprocalEstimate::Unspecified)
6131 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
6132
6133 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
6134 }
6135 }
6136
6137 return SDValue();
6138}
6139
6140//===----------------------------------------------------------------------===//
6141// LoongArch Inline Assembly Support
6142//===----------------------------------------------------------------------===//
6143
6145LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
6146 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
6147 //
6148 // 'f': A floating-point register (if available).
6149 // 'k': A memory operand whose address is formed by a base register and
6150 // (optionally scaled) index register.
6151 // 'l': A signed 16-bit constant.
6152 // 'm': A memory operand whose address is formed by a base register and
6153 // offset that is suitable for use in instructions with the same
6154 // addressing mode as st.w and ld.w.
6155 // 'I': A signed 12-bit constant (for arithmetic instructions).
6156 // 'J': Integer zero.
6157 // 'K': An unsigned 12-bit constant (for logic instructions).
6158 // "ZB": An address that is held in a general-purpose register. The offset is
6159 // zero.
6160 // "ZC": A memory operand whose address is formed by a base register and
6161 // offset that is suitable for use in instructions with the same
6162 // addressing mode as ll.w and sc.w.
6163 if (Constraint.size() == 1) {
6164 switch (Constraint[0]) {
6165 default:
6166 break;
6167 case 'f':
6168 return C_RegisterClass;
6169 case 'l':
6170 case 'I':
6171 case 'J':
6172 case 'K':
6173 return C_Immediate;
6174 case 'k':
6175 return C_Memory;
6176 }
6177 }
6178
6179 if (Constraint == "ZC" || Constraint == "ZB")
6180 return C_Memory;
6181
6182 // 'm' is handled here.
6183 return TargetLowering::getConstraintType(Constraint);
6184}
6185
6186InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
6187 StringRef ConstraintCode) const {
6188 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
6193}
6194
6195std::pair<unsigned, const TargetRegisterClass *>
6196LoongArchTargetLowering::getRegForInlineAsmConstraint(
6197 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
6198 // First, see if this is a constraint that directly corresponds to a LoongArch
6199 // register class.
6200 if (Constraint.size() == 1) {
6201 switch (Constraint[0]) {
6202 case 'r':
6203 // TODO: Support fixed vectors up to GRLen?
6204 if (VT.isVector())
6205 break;
6206 return std::make_pair(0U, &LoongArch::GPRRegClass);
6207 case 'f':
6208 if (Subtarget.hasBasicF() && VT == MVT::f32)
6209 return std::make_pair(0U, &LoongArch::FPR32RegClass);
6210 if (Subtarget.hasBasicD() && VT == MVT::f64)
6211 return std::make_pair(0U, &LoongArch::FPR64RegClass);
6212 if (Subtarget.hasExtLSX() &&
6213 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
6214 return std::make_pair(0U, &LoongArch::LSX128RegClass);
6215 if (Subtarget.hasExtLASX() &&
6216 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
6217 return std::make_pair(0U, &LoongArch::LASX256RegClass);
6218 break;
6219 default:
6220 break;
6221 }
6222 }
6223
6224 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
6225 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
6226 // constraints while the official register name is prefixed with a '$'. So we
6227 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
6228 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
6229 // case insensitive, so no need to convert the constraint to upper case here.
6230 //
6231 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
6232 // decode the usage of register name aliases into their official names. And
6233 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
6234 // official register names.
6235 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
6236 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
6237 bool IsFP = Constraint[2] == 'f';
6238 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
6239 std::pair<unsigned, const TargetRegisterClass *> R;
6241 TRI, join_items("", Temp.first, Temp.second), VT);
6242 // Match those names to the widest floating point register type available.
6243 if (IsFP) {
6244 unsigned RegNo = R.first;
6245 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
6246 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
6247 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
6248 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
6249 }
6250 }
6251 }
6252 return R;
6253 }
6254
6255 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
6256}
6257
6258void LoongArchTargetLowering::LowerAsmOperandForConstraint(
6259 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
6260 SelectionDAG &DAG) const {
6261 // Currently only support length 1 constraints.
6262 if (Constraint.size() == 1) {
6263 switch (Constraint[0]) {
6264 case 'l':
6265 // Validate & create a 16-bit signed immediate operand.
6266 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6267 uint64_t CVal = C->getSExtValue();
6268 if (isInt<16>(CVal))
6269 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
6270 Subtarget.getGRLenVT()));
6271 }
6272 return;
6273 case 'I':
6274 // Validate & create a 12-bit signed immediate operand.
6275 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6276 uint64_t CVal = C->getSExtValue();
6277 if (isInt<12>(CVal))
6278 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
6279 Subtarget.getGRLenVT()));
6280 }
6281 return;
6282 case 'J':
6283 // Validate & create an integer zero operand.
6284 if (auto *C = dyn_cast<ConstantSDNode>(Op))
6285 if (C->getZExtValue() == 0)
6286 Ops.push_back(
6287 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
6288 return;
6289 case 'K':
6290 // Validate & create a 12-bit unsigned immediate operand.
6291 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6292 uint64_t CVal = C->getZExtValue();
6293 if (isUInt<12>(CVal))
6294 Ops.push_back(
6295 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
6296 }
6297 return;
6298 default:
6299 break;
6300 }
6301 }
6302 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
6303}
6304
6305#define GET_REGISTER_MATCHER
6306#include "LoongArchGenAsmMatcher.inc"
6307
6310 const MachineFunction &MF) const {
6311 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
6312 std::string NewRegName = Name.second.str();
6313 Register Reg = MatchRegisterAltName(NewRegName);
6314 if (Reg == LoongArch::NoRegister)
6315 Reg = MatchRegisterName(NewRegName);
6316 if (Reg == LoongArch::NoRegister)
6318 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
6319 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
6320 if (!ReservedRegs.test(Reg))
6321 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
6322 StringRef(RegName) + "\"."));
6323 return Reg;
6324}
6325
6327 EVT VT, SDValue C) const {
6328 // TODO: Support vectors.
6329 if (!VT.isScalarInteger())
6330 return false;
6331
6332 // Omit the optimization if the data size exceeds GRLen.
6333 if (VT.getSizeInBits() > Subtarget.getGRLen())
6334 return false;
6335
6336 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
6337 const APInt &Imm = ConstNode->getAPIntValue();
6338 // Break MUL into (SLLI + ADD/SUB) or ALSL.
6339 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
6340 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
6341 return true;
6342 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
6343 if (ConstNode->hasOneUse() &&
6344 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
6345 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
6346 return true;
6347 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
6348 // in which the immediate has two set bits. Or Break (MUL x, imm)
6349 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
6350 // equals to (1 << s0) - (1 << s1).
6351 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
6352 unsigned Shifts = Imm.countr_zero();
6353 // Reject immediates which can be composed via a single LUI.
6354 if (Shifts >= 12)
6355 return false;
6356 // Reject multiplications can be optimized to
6357 // (SLLI (ALSL x, x, 1/2/3/4), s).
6358 APInt ImmPop = Imm.ashr(Shifts);
6359 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
6360 return false;
6361 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
6362 // since it needs one more instruction than other 3 cases.
6363 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
6364 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
6365 (ImmSmall - Imm).isPowerOf2())
6366 return true;
6367 }
6368 }
6369
6370 return false;
6371}
6372
6374 const AddrMode &AM,
6375 Type *Ty, unsigned AS,
6376 Instruction *I) const {
6377 // LoongArch has four basic addressing modes:
6378 // 1. reg
6379 // 2. reg + 12-bit signed offset
6380 // 3. reg + 14-bit signed offset left-shifted by 2
6381 // 4. reg1 + reg2
6382 // TODO: Add more checks after support vector extension.
6383
6384 // No global is ever allowed as a base.
6385 if (AM.BaseGV)
6386 return false;
6387
6388 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
6389 // with `UAL` feature.
6390 if (!isInt<12>(AM.BaseOffs) &&
6391 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
6392 return false;
6393
6394 switch (AM.Scale) {
6395 case 0:
6396 // "r+i" or just "i", depending on HasBaseReg.
6397 break;
6398 case 1:
6399 // "r+r+i" is not allowed.
6400 if (AM.HasBaseReg && AM.BaseOffs)
6401 return false;
6402 // Otherwise we have "r+r" or "r+i".
6403 break;
6404 case 2:
6405 // "2*r+r" or "2*r+i" is not allowed.
6406 if (AM.HasBaseReg || AM.BaseOffs)
6407 return false;
6408 // Allow "2*r" as "r+r".
6409 break;
6410 default:
6411 return false;
6412 }
6413
6414 return true;
6415}
6416
6418 return isInt<12>(Imm);
6419}
6420
6422 return isInt<12>(Imm);
6423}
6424
6426 // Zexts are free if they can be combined with a load.
6427 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
6428 // poorly with type legalization of compares preferring sext.
6429 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
6430 EVT MemVT = LD->getMemoryVT();
6431 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
6432 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
6433 LD->getExtensionType() == ISD::ZEXTLOAD))
6434 return true;
6435 }
6436
6437 return TargetLowering::isZExtFree(Val, VT2);
6438}
6439
6441 EVT DstVT) const {
6442 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
6443}
6444
6446 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
6447}
6448
6450 // TODO: Support vectors.
6451 if (Y.getValueType().isVector())
6452 return false;
6453
6454 return !isa<ConstantSDNode>(Y);
6455}
6456
6458 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
6459 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
6460}
6461
6463 Type *Ty, bool IsSigned) const {
6464 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
6465 return true;
6466
6467 return IsSigned;
6468}
6469
6471 // Return false to suppress the unnecessary extensions if the LibCall
6472 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
6473 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
6474 Type.getSizeInBits() < Subtarget.getGRLen()))
6475 return false;
6476 return true;
6477}
6478
6479// memcpy, and other memory intrinsics, typically tries to use wider load/store
6480// if the source/dest is aligned and the copy size is large enough. We therefore
6481// want to align such objects passed to memory intrinsics.
6483 unsigned &MinSize,
6484 Align &PrefAlign) const {
6485 if (!isa<MemIntrinsic>(CI))
6486 return false;
6487
6488 if (Subtarget.is64Bit()) {
6489 MinSize = 8;
6490 PrefAlign = Align(8);
6491 } else {
6492 MinSize = 4;
6493 PrefAlign = Align(4);
6494 }
6495
6496 return true;
6497}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
const MCPhysReg ArgFPR32s[]
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static void canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static bool isConstantOrUndef(const SDValue Op)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file contains some functions that are useful when dealing with strings.
Class for arbitrary precision integers.
Definition: APInt.h:78
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:594
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:827
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ Add
*p = old + v
Definition: Instructions.h:720
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:734
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ Xor
*p = old ^ v
Definition: Instructions.h:730
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:732
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:738
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:736
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726
Value * getPointerOperand()
Definition: Instructions.h:870
bool isFloatingPointOperation() const
Definition: Instructions.h:882
BinOp getOperation() const
Definition: Instructions.h:805
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:861
Value * getValOperand()
Definition: Instructions.h:874
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:847
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
bool test(unsigned Idx) const
Definition: BitVector.h:461
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:220
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:364
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
A debug info location.
Definition: DebugLoc.h:33
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
Argument * getArg(unsigned i) const
Definition: Function.h:886
bool isDSOLocal() const
Definition: GlobalValue.h:305
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2060
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1460
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:523
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:528
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1772
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1367
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2155
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:494
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1439
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2048
LLVMContext & getContext() const
Definition: IRBuilder.h:173
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1498
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2145
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1877
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2034
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1520
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
Class to represent integer types.
Definition: DerivedTypes.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
unsigned getMaxBytesForAlignment() const
Align getPrefFunctionAlignment() const
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
Class to represent pointers.
Definition: DerivedTypes.h:670
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:703
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
size_t use_size() const
Return the number of uses of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:748
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:799
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:501
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:854
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:825
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:495
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:710
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:496
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:794
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
LLVMContext * getContext() const
Definition: SelectionDAG.h:508
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:765
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:578
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:578
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:700
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ Entry
Definition: COFF.h:844
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1299
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1304
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1490
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1044
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1282
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1217
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1613
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1593
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
ABI getTargetABI(StringRef ABIName)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
@ None
Definition: CodeGenData.h:106
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:193
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:212
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Register getFrameRegister(const MachineFunction &MF) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)