clang  3.8.0
CGBuiltin.cpp
Go to the documentation of this file.
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CodeGenFunction.h"
15 #include "CGCXXABI.h"
16 #include "CGObjCRuntime.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/ASTContext.h"
20 #include "clang/AST/Decl.h"
22 #include "clang/Basic/TargetInfo.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/IR/CallSite.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/InlineAsm.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include <sstream>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 using namespace llvm;
34 
35 /// getBuiltinLibFunction - Given a builtin id for a function like
36 /// "__builtin_fabsf", return a Function* for "fabsf".
38  unsigned BuiltinID) {
39  assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
40 
41  // Get the name, skip over the __builtin_ prefix (if necessary).
42  StringRef Name;
43  GlobalDecl D(FD);
44 
45  // If the builtin has been declared explicitly with an assembler label,
46  // use the mangled name. This differs from the plain label on platforms
47  // that prefix labels.
48  if (FD->hasAttr<AsmLabelAttr>())
49  Name = getMangledName(D);
50  else
51  Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
52 
53  llvm::FunctionType *Ty =
54  cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
55 
56  return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
57 }
58 
59 /// Emit the conversions required to turn the given value into an
60 /// integer of the given size.
62  QualType T, llvm::IntegerType *IntType) {
63  V = CGF.EmitToMemory(V, T);
64 
65  if (V->getType()->isPointerTy())
66  return CGF.Builder.CreatePtrToInt(V, IntType);
67 
68  assert(V->getType() == IntType);
69  return V;
70 }
71 
73  QualType T, llvm::Type *ResultType) {
74  V = CGF.EmitFromMemory(V, T);
75 
76  if (ResultType->isPointerTy())
77  return CGF.Builder.CreateIntToPtr(V, ResultType);
78 
79  assert(V->getType() == ResultType);
80  return V;
81 }
82 
83 /// Utility to insert an atomic instruction based on Instrinsic::ID
84 /// and the expression node.
86  llvm::AtomicRMWInst::BinOp Kind,
87  const CallExpr *E) {
88  QualType T = E->getType();
89  assert(E->getArg(0)->getType()->isPointerType());
90  assert(CGF.getContext().hasSameUnqualifiedType(T,
91  E->getArg(0)->getType()->getPointeeType()));
92  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
93 
94  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
95  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
96 
97  llvm::IntegerType *IntType =
98  llvm::IntegerType::get(CGF.getLLVMContext(),
99  CGF.getContext().getTypeSize(T));
100  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
101 
102  llvm::Value *Args[2];
103  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
104  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
105  llvm::Type *ValueType = Args[1]->getType();
106  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
107 
109  CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
110  llvm::SequentiallyConsistent);
111  return EmitFromInt(CGF, Result, T, ValueType);
112 }
113 
115  Value *Val = CGF.EmitScalarExpr(E->getArg(0));
116  Value *Address = CGF.EmitScalarExpr(E->getArg(1));
117 
118  // Convert the type of the pointer to a pointer to the stored type.
119  Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
120  Value *BC = CGF.Builder.CreateBitCast(
121  Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
122  LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
123  LV.setNontemporal(true);
124  CGF.EmitStoreOfScalar(Val, LV, false);
125  return nullptr;
126 }
127 
129  Value *Address = CGF.EmitScalarExpr(E->getArg(0));
130 
131  LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
132  LV.setNontemporal(true);
133  return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
134 }
135 
137  llvm::AtomicRMWInst::BinOp Kind,
138  const CallExpr *E) {
139  return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
140 }
141 
142 /// Utility to insert an atomic instruction based Instrinsic::ID and
143 /// the expression node, where the return value is the result of the
144 /// operation.
146  llvm::AtomicRMWInst::BinOp Kind,
147  const CallExpr *E,
148  Instruction::BinaryOps Op,
149  bool Invert = false) {
150  QualType T = E->getType();
151  assert(E->getArg(0)->getType()->isPointerType());
152  assert(CGF.getContext().hasSameUnqualifiedType(T,
153  E->getArg(0)->getType()->getPointeeType()));
154  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
155 
156  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
157  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
158 
159  llvm::IntegerType *IntType =
160  llvm::IntegerType::get(CGF.getLLVMContext(),
161  CGF.getContext().getTypeSize(T));
162  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
163 
164  llvm::Value *Args[2];
165  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
166  llvm::Type *ValueType = Args[1]->getType();
167  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
168  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
169 
171  CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
172  llvm::SequentiallyConsistent);
173  Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
174  if (Invert)
175  Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
176  llvm::ConstantInt::get(IntType, -1));
177  Result = EmitFromInt(CGF, Result, T, ValueType);
178  return RValue::get(Result);
179 }
180 
181 /// @brief Utility to insert an atomic cmpxchg instruction.
182 ///
183 /// @param CGF The current codegen function.
184 /// @param E Builtin call expression to convert to cmpxchg.
185 /// arg0 - address to operate on
186 /// arg1 - value to compare with
187 /// arg2 - new value
188 /// @param ReturnBool Specifies whether to return success flag of
189 /// cmpxchg result or the old value.
190 ///
191 /// @returns result of cmpxchg, according to ReturnBool
193  bool ReturnBool) {
194  QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
195  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
196  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
197 
198  llvm::IntegerType *IntType = llvm::IntegerType::get(
199  CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
200  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
201 
202  Value *Args[3];
203  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
204  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
205  llvm::Type *ValueType = Args[1]->getType();
206  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
207  Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
208 
209  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
210  llvm::SequentiallyConsistent,
211  llvm::SequentiallyConsistent);
212  if (ReturnBool)
213  // Extract boolean success flag and zext it to int.
214  return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
215  CGF.ConvertType(E->getType()));
216  else
217  // Extract old value and emit it using the same type as compare value.
218  return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
219  ValueType);
220 }
221 
222 /// EmitFAbs - Emit a call to @llvm.fabs().
223 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
224  Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
225  llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
226  Call->setDoesNotAccessMemory();
227  return Call;
228 }
229 
230 /// Emit the computation of the sign bit for a floating point value. Returns
231 /// the i1 sign bit value.
233  LLVMContext &C = CGF.CGM.getLLVMContext();
234 
235  llvm::Type *Ty = V->getType();
236  int Width = Ty->getPrimitiveSizeInBits();
237  llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
238  V = CGF.Builder.CreateBitCast(V, IntTy);
239  if (Ty->isPPC_FP128Ty()) {
240  // We want the sign bit of the higher-order double. The bitcast we just
241  // did works as if the double-double was stored to memory and then
242  // read as an i128. The "store" will put the higher-order double in the
243  // lower address in both little- and big-Endian modes, but the "load"
244  // will treat those bits as a different part of the i128: the low bits in
245  // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
246  // we need to shift the high bits down to the low before truncating.
247  Width >>= 1;
248  if (CGF.getTarget().isBigEndian()) {
249  Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
250  V = CGF.Builder.CreateLShr(V, ShiftCst);
251  }
252  // We are truncating value in order to extract the higher-order
253  // double, which we will be using to extract the sign from.
254  IntTy = llvm::IntegerType::get(C, Width);
255  V = CGF.Builder.CreateTrunc(V, IntTy);
256  }
257  Value *Zero = llvm::Constant::getNullValue(IntTy);
258  return CGF.Builder.CreateICmpSLT(V, Zero);
259 }
260 
262  const CallExpr *E, llvm::Value *calleeValue) {
263  return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E,
264  ReturnValueSlot(), Fn);
265 }
266 
267 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
268 /// depending on IntrinsicID.
269 ///
270 /// \arg CGF The current codegen function.
271 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
272 /// \arg X The first argument to the llvm.*.with.overflow.*.
273 /// \arg Y The second argument to the llvm.*.with.overflow.*.
274 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
275 /// \returns The result (i.e. sum/product) returned by the intrinsic.
277  const llvm::Intrinsic::ID IntrinsicID,
279  llvm::Value *&Carry) {
280  // Make sure we have integers of the same width.
281  assert(X->getType() == Y->getType() &&
282  "Arguments must be the same type. (Did you forget to make sure both "
283  "arguments have the same integer width?)");
284 
285  llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
286  llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
287  Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
288  return CGF.Builder.CreateExtractValue(Tmp, 0);
289 }
290 
291 namespace {
292  struct WidthAndSignedness {
293  unsigned Width;
294  bool Signed;
295  };
296 }
297 
298 static WidthAndSignedness
300  const clang::QualType Type) {
301  assert(Type->isIntegerType() && "Given type is not an integer.");
302  unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
303  bool Signed = Type->isSignedIntegerType();
304  return {Width, Signed};
305 }
306 
307 // Given one or more integer types, this function produces an integer type that
308 // encompasses them: any value in one of the given types could be expressed in
309 // the encompassing type.
310 static struct WidthAndSignedness
311 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
312  assert(Types.size() > 0 && "Empty list of types.");
313 
314  // If any of the given types is signed, we must return a signed type.
315  bool Signed = false;
316  for (const auto &Type : Types) {
317  Signed |= Type.Signed;
318  }
319 
320  // The encompassing type must have a width greater than or equal to the width
321  // of the specified types. Aditionally, if the encompassing type is signed,
322  // its width must be strictly greater than the width of any unsigned types
323  // given.
324  unsigned Width = 0;
325  for (const auto &Type : Types) {
326  unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
327  if (Width < MinWidth) {
328  Width = MinWidth;
329  }
330  }
331 
332  return {Width, Signed};
333 }
334 
335 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
336  llvm::Type *DestType = Int8PtrTy;
337  if (ArgValue->getType() != DestType)
338  ArgValue =
339  Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
340 
341  Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
342  return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
343 }
344 
345 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
346 /// __builtin_object_size(p, @p To) is correct
347 static bool areBOSTypesCompatible(int From, int To) {
348  // Note: Our __builtin_object_size implementation currently treats Type=0 and
349  // Type=2 identically. Encoding this implementation detail here may make
350  // improving __builtin_object_size difficult in the future, so it's omitted.
351  return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
352 }
353 
354 static llvm::Value *
355 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
356  return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
357 }
358 
359 llvm::Value *
360 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
361  llvm::IntegerType *ResType) {
362  uint64_t ObjectSize;
363  if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
364  return emitBuiltinObjectSize(E, Type, ResType);
365  return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
366 }
367 
368 /// Returns a Value corresponding to the size of the given expression.
369 /// This Value may be either of the following:
370 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
371 /// it)
372 /// - A call to the @llvm.objectsize intrinsic
373 llvm::Value *
374 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
375  llvm::IntegerType *ResType) {
376  // We need to reference an argument if the pointer is a parameter with the
377  // pass_object_size attribute.
378  if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
379  auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
380  auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
381  if (Param != nullptr && PS != nullptr &&
382  areBOSTypesCompatible(PS->getType(), Type)) {
383  auto Iter = SizeArguments.find(Param);
384  assert(Iter != SizeArguments.end());
385 
386  const ImplicitParamDecl *D = Iter->second;
387  auto DIter = LocalDeclMap.find(D);
388  assert(DIter != LocalDeclMap.end());
389 
390  return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
391  getContext().getSizeType(), E->getLocStart());
392  }
393  }
394 
395  // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
396  // evaluate E for side-effects. In either case, we shouldn't lower to
397  // @llvm.objectsize.
398  if (Type == 3 || E->HasSideEffects(getContext()))
399  return getDefaultBuiltinObjectSizeResult(Type, ResType);
400 
401  // LLVM only supports 0 and 2, make sure that we pass along that
402  // as a boolean.
403  auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1);
404  // FIXME: Get right address space.
405  llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)};
406  Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
407  return Builder.CreateCall(F, {EmitScalarExpr(E), CI});
408 }
409 
411  unsigned BuiltinID, const CallExpr *E,
412  ReturnValueSlot ReturnValue) {
413  // See if we can constant fold this builtin. If so, don't emit it at all.
415  if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
416  !Result.hasSideEffects()) {
417  if (Result.Val.isInt())
418  return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
419  Result.Val.getInt()));
420  if (Result.Val.isFloat())
421  return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
422  Result.Val.getFloat()));
423  }
424 
425  switch (BuiltinID) {
426  default: break; // Handle intrinsics and libm functions below.
427  case Builtin::BI__builtin___CFStringMakeConstantString:
428  case Builtin::BI__builtin___NSStringMakeConstantString:
429  return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
430  case Builtin::BI__builtin_stdarg_start:
431  case Builtin::BI__builtin_va_start:
432  case Builtin::BI__va_start:
433  case Builtin::BI__builtin_va_end:
434  return RValue::get(
435  EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
436  ? EmitScalarExpr(E->getArg(0))
437  : EmitVAListRef(E->getArg(0)).getPointer(),
438  BuiltinID != Builtin::BI__builtin_va_end));
439  case Builtin::BI__builtin_va_copy: {
440  Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
441  Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
442 
443  llvm::Type *Type = Int8PtrTy;
444 
445  DstPtr = Builder.CreateBitCast(DstPtr, Type);
446  SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
447  return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
448  {DstPtr, SrcPtr}));
449  }
450  case Builtin::BI__builtin_abs:
451  case Builtin::BI__builtin_labs:
452  case Builtin::BI__builtin_llabs: {
453  Value *ArgValue = EmitScalarExpr(E->getArg(0));
454 
455  Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
456  Value *CmpResult =
457  Builder.CreateICmpSGE(ArgValue,
458  llvm::Constant::getNullValue(ArgValue->getType()),
459  "abscond");
460  Value *Result =
461  Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
462 
463  return RValue::get(Result);
464  }
465  case Builtin::BI__builtin_fabs:
466  case Builtin::BI__builtin_fabsf:
467  case Builtin::BI__builtin_fabsl: {
468  Value *Arg1 = EmitScalarExpr(E->getArg(0));
469  Value *Result = EmitFAbs(*this, Arg1);
470  return RValue::get(Result);
471  }
472  case Builtin::BI__builtin_fmod:
473  case Builtin::BI__builtin_fmodf:
474  case Builtin::BI__builtin_fmodl: {
475  Value *Arg1 = EmitScalarExpr(E->getArg(0));
476  Value *Arg2 = EmitScalarExpr(E->getArg(1));
477  Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
478  return RValue::get(Result);
479  }
480 
481  case Builtin::BI__builtin_conj:
482  case Builtin::BI__builtin_conjf:
483  case Builtin::BI__builtin_conjl: {
484  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
485  Value *Real = ComplexVal.first;
486  Value *Imag = ComplexVal.second;
487  Value *Zero =
488  Imag->getType()->isFPOrFPVectorTy()
489  ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
490  : llvm::Constant::getNullValue(Imag->getType());
491 
492  Imag = Builder.CreateFSub(Zero, Imag, "sub");
493  return RValue::getComplex(std::make_pair(Real, Imag));
494  }
495  case Builtin::BI__builtin_creal:
496  case Builtin::BI__builtin_crealf:
497  case Builtin::BI__builtin_creall:
498  case Builtin::BIcreal:
499  case Builtin::BIcrealf:
500  case Builtin::BIcreall: {
501  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
502  return RValue::get(ComplexVal.first);
503  }
504 
505  case Builtin::BI__builtin_cimag:
506  case Builtin::BI__builtin_cimagf:
507  case Builtin::BI__builtin_cimagl:
508  case Builtin::BIcimag:
509  case Builtin::BIcimagf:
510  case Builtin::BIcimagl: {
511  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
512  return RValue::get(ComplexVal.second);
513  }
514 
515  case Builtin::BI__builtin_ctzs:
516  case Builtin::BI__builtin_ctz:
517  case Builtin::BI__builtin_ctzl:
518  case Builtin::BI__builtin_ctzll: {
519  Value *ArgValue = EmitScalarExpr(E->getArg(0));
520 
521  llvm::Type *ArgType = ArgValue->getType();
522  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
523 
524  llvm::Type *ResultType = ConvertType(E->getType());
525  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
526  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
527  if (Result->getType() != ResultType)
528  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
529  "cast");
530  return RValue::get(Result);
531  }
532  case Builtin::BI__builtin_clzs:
533  case Builtin::BI__builtin_clz:
534  case Builtin::BI__builtin_clzl:
535  case Builtin::BI__builtin_clzll: {
536  Value *ArgValue = EmitScalarExpr(E->getArg(0));
537 
538  llvm::Type *ArgType = ArgValue->getType();
539  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
540 
541  llvm::Type *ResultType = ConvertType(E->getType());
542  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
543  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
544  if (Result->getType() != ResultType)
545  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
546  "cast");
547  return RValue::get(Result);
548  }
549  case Builtin::BI__builtin_ffs:
550  case Builtin::BI__builtin_ffsl:
551  case Builtin::BI__builtin_ffsll: {
552  // ffs(x) -> x ? cttz(x) + 1 : 0
553  Value *ArgValue = EmitScalarExpr(E->getArg(0));
554 
555  llvm::Type *ArgType = ArgValue->getType();
556  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
557 
558  llvm::Type *ResultType = ConvertType(E->getType());
559  Value *Tmp =
560  Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
561  llvm::ConstantInt::get(ArgType, 1));
562  Value *Zero = llvm::Constant::getNullValue(ArgType);
563  Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
564  Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
565  if (Result->getType() != ResultType)
566  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
567  "cast");
568  return RValue::get(Result);
569  }
570  case Builtin::BI__builtin_parity:
571  case Builtin::BI__builtin_parityl:
572  case Builtin::BI__builtin_parityll: {
573  // parity(x) -> ctpop(x) & 1
574  Value *ArgValue = EmitScalarExpr(E->getArg(0));
575 
576  llvm::Type *ArgType = ArgValue->getType();
577  Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
578 
579  llvm::Type *ResultType = ConvertType(E->getType());
580  Value *Tmp = Builder.CreateCall(F, ArgValue);
581  Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
582  if (Result->getType() != ResultType)
583  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
584  "cast");
585  return RValue::get(Result);
586  }
587  case Builtin::BI__builtin_popcount:
588  case Builtin::BI__builtin_popcountl:
589  case Builtin::BI__builtin_popcountll: {
590  Value *ArgValue = EmitScalarExpr(E->getArg(0));
591 
592  llvm::Type *ArgType = ArgValue->getType();
593  Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
594 
595  llvm::Type *ResultType = ConvertType(E->getType());
596  Value *Result = Builder.CreateCall(F, ArgValue);
597  if (Result->getType() != ResultType)
598  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
599  "cast");
600  return RValue::get(Result);
601  }
602  case Builtin::BI__builtin_unpredictable: {
603  // Always return the argument of __builtin_unpredictable. LLVM does not
604  // handle this builtin. Metadata for this builtin should be added directly
605  // to instructions such as branches or switches that use it.
606  return RValue::get(EmitScalarExpr(E->getArg(0)));
607  }
608  case Builtin::BI__builtin_expect: {
609  Value *ArgValue = EmitScalarExpr(E->getArg(0));
610  llvm::Type *ArgType = ArgValue->getType();
611 
612  Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
613  // Don't generate llvm.expect on -O0 as the backend won't use it for
614  // anything.
615  // Note, we still IRGen ExpectedValue because it could have side-effects.
616  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
617  return RValue::get(ArgValue);
618 
619  Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
620  Value *Result =
621  Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
622  return RValue::get(Result);
623  }
624  case Builtin::BI__builtin_assume_aligned: {
625  Value *PtrValue = EmitScalarExpr(E->getArg(0));
626  Value *OffsetValue =
627  (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
628 
629  Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
630  ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
631  unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
632 
633  EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
634  return RValue::get(PtrValue);
635  }
636  case Builtin::BI__assume:
637  case Builtin::BI__builtin_assume: {
638  if (E->getArg(0)->HasSideEffects(getContext()))
639  return RValue::get(nullptr);
640 
641  Value *ArgValue = EmitScalarExpr(E->getArg(0));
642  Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
643  return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
644  }
645  case Builtin::BI__builtin_bswap16:
646  case Builtin::BI__builtin_bswap32:
647  case Builtin::BI__builtin_bswap64: {
648  Value *ArgValue = EmitScalarExpr(E->getArg(0));
649  llvm::Type *ArgType = ArgValue->getType();
650  Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
651  return RValue::get(Builder.CreateCall(F, ArgValue));
652  }
653  case Builtin::BI__builtin_object_size: {
654  unsigned Type =
655  E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
656  auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
657 
658  // We pass this builtin onto the optimizer so that it can figure out the
659  // object size in more complex cases.
660  return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType));
661  }
662  case Builtin::BI__builtin_prefetch: {
663  Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
664  // FIXME: Technically these constants should of type 'int', yes?
665  RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
666  llvm::ConstantInt::get(Int32Ty, 0);
667  Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
668  llvm::ConstantInt::get(Int32Ty, 3);
669  Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
670  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
671  return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
672  }
673  case Builtin::BI__builtin_readcyclecounter: {
674  Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
675  return RValue::get(Builder.CreateCall(F));
676  }
677  case Builtin::BI__builtin___clear_cache: {
678  Value *Begin = EmitScalarExpr(E->getArg(0));
679  Value *End = EmitScalarExpr(E->getArg(1));
680  Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
681  return RValue::get(Builder.CreateCall(F, {Begin, End}));
682  }
683  case Builtin::BI__builtin_trap:
684  return RValue::get(EmitTrapCall(Intrinsic::trap));
685  case Builtin::BI__debugbreak:
686  return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
687  case Builtin::BI__builtin_unreachable: {
688  if (SanOpts.has(SanitizerKind::Unreachable)) {
689  SanitizerScope SanScope(this);
690  EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
691  SanitizerKind::Unreachable),
692  "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()),
693  None);
694  } else
695  Builder.CreateUnreachable();
696 
697  // We do need to preserve an insertion point.
698  EmitBlock(createBasicBlock("unreachable.cont"));
699 
700  return RValue::get(nullptr);
701  }
702 
703  case Builtin::BI__builtin_powi:
704  case Builtin::BI__builtin_powif:
705  case Builtin::BI__builtin_powil: {
706  Value *Base = EmitScalarExpr(E->getArg(0));
707  Value *Exponent = EmitScalarExpr(E->getArg(1));
708  llvm::Type *ArgType = Base->getType();
709  Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
710  return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
711  }
712 
713  case Builtin::BI__builtin_isgreater:
714  case Builtin::BI__builtin_isgreaterequal:
715  case Builtin::BI__builtin_isless:
716  case Builtin::BI__builtin_islessequal:
717  case Builtin::BI__builtin_islessgreater:
718  case Builtin::BI__builtin_isunordered: {
719  // Ordered comparisons: we know the arguments to these are matching scalar
720  // floating point values.
721  Value *LHS = EmitScalarExpr(E->getArg(0));
722  Value *RHS = EmitScalarExpr(E->getArg(1));
723 
724  switch (BuiltinID) {
725  default: llvm_unreachable("Unknown ordered comparison");
726  case Builtin::BI__builtin_isgreater:
727  LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
728  break;
729  case Builtin::BI__builtin_isgreaterequal:
730  LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
731  break;
732  case Builtin::BI__builtin_isless:
733  LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
734  break;
735  case Builtin::BI__builtin_islessequal:
736  LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
737  break;
738  case Builtin::BI__builtin_islessgreater:
739  LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
740  break;
741  case Builtin::BI__builtin_isunordered:
742  LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
743  break;
744  }
745  // ZExt bool to int type.
746  return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
747  }
748  case Builtin::BI__builtin_isnan: {
749  Value *V = EmitScalarExpr(E->getArg(0));
750  V = Builder.CreateFCmpUNO(V, V, "cmp");
751  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
752  }
753 
754  case Builtin::BI__builtin_isinf: {
755  // isinf(x) --> fabs(x) == infinity
756  Value *V = EmitScalarExpr(E->getArg(0));
757  V = EmitFAbs(*this, V);
758 
759  V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
760  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
761  }
762 
763  case Builtin::BI__builtin_isinf_sign: {
764  // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
765  Value *Arg = EmitScalarExpr(E->getArg(0));
766  Value *AbsArg = EmitFAbs(*this, Arg);
767  Value *IsInf = Builder.CreateFCmpOEQ(
768  AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
769  Value *IsNeg = EmitSignBit(*this, Arg);
770 
771  llvm::Type *IntTy = ConvertType(E->getType());
772  Value *Zero = Constant::getNullValue(IntTy);
773  Value *One = ConstantInt::get(IntTy, 1);
774  Value *NegativeOne = ConstantInt::get(IntTy, -1);
775  Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
776  Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
777  return RValue::get(Result);
778  }
779 
780  case Builtin::BI__builtin_isnormal: {
781  // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
782  Value *V = EmitScalarExpr(E->getArg(0));
783  Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
784 
785  Value *Abs = EmitFAbs(*this, V);
786  Value *IsLessThanInf =
787  Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
788  APFloat Smallest = APFloat::getSmallestNormalized(
789  getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
790  Value *IsNormal =
791  Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
792  "isnormal");
793  V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
794  V = Builder.CreateAnd(V, IsNormal, "and");
795  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
796  }
797 
798  case Builtin::BI__builtin_isfinite: {
799  // isfinite(x) --> x == x && fabs(x) != infinity;
800  Value *V = EmitScalarExpr(E->getArg(0));
801  Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
802 
803  Value *Abs = EmitFAbs(*this, V);
804  Value *IsNotInf =
805  Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
806 
807  V = Builder.CreateAnd(Eq, IsNotInf, "and");
808  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
809  }
810 
811  case Builtin::BI__builtin_fpclassify: {
812  Value *V = EmitScalarExpr(E->getArg(5));
813  llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
814 
815  // Create Result
816  BasicBlock *Begin = Builder.GetInsertBlock();
817  BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
818  Builder.SetInsertPoint(End);
819  PHINode *Result =
820  Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
821  "fpclassify_result");
822 
823  // if (V==0) return FP_ZERO
824  Builder.SetInsertPoint(Begin);
825  Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
826  "iszero");
827  Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
828  BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
829  Builder.CreateCondBr(IsZero, End, NotZero);
830  Result->addIncoming(ZeroLiteral, Begin);
831 
832  // if (V != V) return FP_NAN
833  Builder.SetInsertPoint(NotZero);
834  Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
835  Value *NanLiteral = EmitScalarExpr(E->getArg(0));
836  BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
837  Builder.CreateCondBr(IsNan, End, NotNan);
838  Result->addIncoming(NanLiteral, NotZero);
839 
840  // if (fabs(V) == infinity) return FP_INFINITY
841  Builder.SetInsertPoint(NotNan);
842  Value *VAbs = EmitFAbs(*this, V);
843  Value *IsInf =
844  Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
845  "isinf");
846  Value *InfLiteral = EmitScalarExpr(E->getArg(1));
847  BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
848  Builder.CreateCondBr(IsInf, End, NotInf);
849  Result->addIncoming(InfLiteral, NotNan);
850 
851  // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
852  Builder.SetInsertPoint(NotInf);
853  APFloat Smallest = APFloat::getSmallestNormalized(
854  getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
855  Value *IsNormal =
856  Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
857  "isnormal");
858  Value *NormalResult =
859  Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
860  EmitScalarExpr(E->getArg(3)));
861  Builder.CreateBr(End);
862  Result->addIncoming(NormalResult, NotInf);
863 
864  // return Result
865  Builder.SetInsertPoint(End);
866  return RValue::get(Result);
867  }
868 
869  case Builtin::BIalloca:
870  case Builtin::BI_alloca:
871  case Builtin::BI__builtin_alloca: {
872  Value *Size = EmitScalarExpr(E->getArg(0));
873  return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
874  }
875  case Builtin::BIbzero:
876  case Builtin::BI__builtin_bzero: {
877  Address Dest = EmitPointerWithAlignment(E->getArg(0));
878  Value *SizeVal = EmitScalarExpr(E->getArg(1));
879  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
880  E->getArg(0)->getExprLoc(), FD, 0);
881  Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
882  return RValue::get(Dest.getPointer());
883  }
884  case Builtin::BImemcpy:
885  case Builtin::BI__builtin_memcpy: {
886  Address Dest = EmitPointerWithAlignment(E->getArg(0));
887  Address Src = EmitPointerWithAlignment(E->getArg(1));
888  Value *SizeVal = EmitScalarExpr(E->getArg(2));
889  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
890  E->getArg(0)->getExprLoc(), FD, 0);
891  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
892  E->getArg(1)->getExprLoc(), FD, 1);
893  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
894  return RValue::get(Dest.getPointer());
895  }
896 
897  case Builtin::BI__builtin___memcpy_chk: {
898  // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
899  llvm::APSInt Size, DstSize;
900  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
901  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
902  break;
903  if (Size.ugt(DstSize))
904  break;
905  Address Dest = EmitPointerWithAlignment(E->getArg(0));
906  Address Src = EmitPointerWithAlignment(E->getArg(1));
907  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
908  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
909  return RValue::get(Dest.getPointer());
910  }
911 
912  case Builtin::BI__builtin_objc_memmove_collectable: {
913  Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
914  Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
915  Value *SizeVal = EmitScalarExpr(E->getArg(2));
916  CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
917  DestAddr, SrcAddr, SizeVal);
918  return RValue::get(DestAddr.getPointer());
919  }
920 
921  case Builtin::BI__builtin___memmove_chk: {
922  // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
923  llvm::APSInt Size, DstSize;
924  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
925  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
926  break;
927  if (Size.ugt(DstSize))
928  break;
929  Address Dest = EmitPointerWithAlignment(E->getArg(0));
930  Address Src = EmitPointerWithAlignment(E->getArg(1));
931  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
932  Builder.CreateMemMove(Dest, Src, SizeVal, false);
933  return RValue::get(Dest.getPointer());
934  }
935 
936  case Builtin::BImemmove:
937  case Builtin::BI__builtin_memmove: {
938  Address Dest = EmitPointerWithAlignment(E->getArg(0));
939  Address Src = EmitPointerWithAlignment(E->getArg(1));
940  Value *SizeVal = EmitScalarExpr(E->getArg(2));
941  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
942  E->getArg(0)->getExprLoc(), FD, 0);
943  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
944  E->getArg(1)->getExprLoc(), FD, 1);
945  Builder.CreateMemMove(Dest, Src, SizeVal, false);
946  return RValue::get(Dest.getPointer());
947  }
948  case Builtin::BImemset:
949  case Builtin::BI__builtin_memset: {
950  Address Dest = EmitPointerWithAlignment(E->getArg(0));
951  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
952  Builder.getInt8Ty());
953  Value *SizeVal = EmitScalarExpr(E->getArg(2));
954  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
955  E->getArg(0)->getExprLoc(), FD, 0);
956  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
957  return RValue::get(Dest.getPointer());
958  }
959  case Builtin::BI__builtin___memset_chk: {
960  // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
961  llvm::APSInt Size, DstSize;
962  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
963  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
964  break;
965  if (Size.ugt(DstSize))
966  break;
967  Address Dest = EmitPointerWithAlignment(E->getArg(0));
968  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
969  Builder.getInt8Ty());
970  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
971  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
972  return RValue::get(Dest.getPointer());
973  }
974  case Builtin::BI__builtin_dwarf_cfa: {
975  // The offset in bytes from the first argument to the CFA.
976  //
977  // Why on earth is this in the frontend? Is there any reason at
978  // all that the backend can't reasonably determine this while
979  // lowering llvm.eh.dwarf.cfa()?
980  //
981  // TODO: If there's a satisfactory reason, add a target hook for
982  // this instead of hard-coding 0, which is correct for most targets.
983  int32_t Offset = 0;
984 
985  Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
986  return RValue::get(Builder.CreateCall(F,
987  llvm::ConstantInt::get(Int32Ty, Offset)));
988  }
989  case Builtin::BI__builtin_return_address: {
990  Value *Depth =
991  CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
992  Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
993  return RValue::get(Builder.CreateCall(F, Depth));
994  }
995  case Builtin::BI__builtin_frame_address: {
996  Value *Depth =
997  CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
998  Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
999  return RValue::get(Builder.CreateCall(F, Depth));
1000  }
1001  case Builtin::BI__builtin_extract_return_addr: {
1002  Value *Address = EmitScalarExpr(E->getArg(0));
1003  Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1004  return RValue::get(Result);
1005  }
1006  case Builtin::BI__builtin_frob_return_addr: {
1007  Value *Address = EmitScalarExpr(E->getArg(0));
1008  Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1009  return RValue::get(Result);
1010  }
1011  case Builtin::BI__builtin_dwarf_sp_column: {
1012  llvm::IntegerType *Ty
1013  = cast<llvm::IntegerType>(ConvertType(E->getType()));
1014  int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1015  if (Column == -1) {
1016  CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1017  return RValue::get(llvm::UndefValue::get(Ty));
1018  }
1019  return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1020  }
1021  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1022  Value *Address = EmitScalarExpr(E->getArg(0));
1023  if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1024  CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1025  return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1026  }
1027  case Builtin::BI__builtin_eh_return: {
1028  Value *Int = EmitScalarExpr(E->getArg(0));
1029  Value *Ptr = EmitScalarExpr(E->getArg(1));
1030 
1031  llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1032  assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1033  "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1034  Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1035  ? Intrinsic::eh_return_i32
1036  : Intrinsic::eh_return_i64);
1037  Builder.CreateCall(F, {Int, Ptr});
1038  Builder.CreateUnreachable();
1039 
1040  // We do need to preserve an insertion point.
1041  EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1042 
1043  return RValue::get(nullptr);
1044  }
1045  case Builtin::BI__builtin_unwind_init: {
1046  Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1047  return RValue::get(Builder.CreateCall(F));
1048  }
1049  case Builtin::BI__builtin_extend_pointer: {
1050  // Extends a pointer to the size of an _Unwind_Word, which is
1051  // uint64_t on all platforms. Generally this gets poked into a
1052  // register and eventually used as an address, so if the
1053  // addressing registers are wider than pointers and the platform
1054  // doesn't implicitly ignore high-order bits when doing
1055  // addressing, we need to make sure we zext / sext based on
1056  // the platform's expectations.
1057  //
1058  // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1059 
1060  // Cast the pointer to intptr_t.
1061  Value *Ptr = EmitScalarExpr(E->getArg(0));
1062  Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1063 
1064  // If that's 64 bits, we're done.
1065  if (IntPtrTy->getBitWidth() == 64)
1066  return RValue::get(Result);
1067 
1068  // Otherwise, ask the codegen data what to do.
1069  if (getTargetHooks().extendPointerWithSExt())
1070  return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1071  else
1072  return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1073  }
1074  case Builtin::BI__builtin_setjmp: {
1075  // Buffer is a void**.
1076  Address Buf = EmitPointerWithAlignment(E->getArg(0));
1077 
1078  // Store the frame pointer to the setjmp buffer.
1079  Value *FrameAddr =
1080  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1081  ConstantInt::get(Int32Ty, 0));
1082  Builder.CreateStore(FrameAddr, Buf);
1083 
1084  // Store the stack pointer to the setjmp buffer.
1085  Value *StackAddr =
1086  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1087  Address StackSaveSlot =
1088  Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1089  Builder.CreateStore(StackAddr, StackSaveSlot);
1090 
1091  // Call LLVM's EH setjmp, which is lightweight.
1092  Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1093  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1094  return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1095  }
1096  case Builtin::BI__builtin_longjmp: {
1097  Value *Buf = EmitScalarExpr(E->getArg(0));
1098  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1099 
1100  // Call LLVM's EH longjmp, which is lightweight.
1101  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1102 
1103  // longjmp doesn't return; mark this as unreachable.
1104  Builder.CreateUnreachable();
1105 
1106  // We do need to preserve an insertion point.
1107  EmitBlock(createBasicBlock("longjmp.cont"));
1108 
1109  return RValue::get(nullptr);
1110  }
1111  case Builtin::BI__sync_fetch_and_add:
1112  case Builtin::BI__sync_fetch_and_sub:
1113  case Builtin::BI__sync_fetch_and_or:
1114  case Builtin::BI__sync_fetch_and_and:
1115  case Builtin::BI__sync_fetch_and_xor:
1116  case Builtin::BI__sync_fetch_and_nand:
1117  case Builtin::BI__sync_add_and_fetch:
1118  case Builtin::BI__sync_sub_and_fetch:
1119  case Builtin::BI__sync_and_and_fetch:
1120  case Builtin::BI__sync_or_and_fetch:
1121  case Builtin::BI__sync_xor_and_fetch:
1122  case Builtin::BI__sync_nand_and_fetch:
1123  case Builtin::BI__sync_val_compare_and_swap:
1124  case Builtin::BI__sync_bool_compare_and_swap:
1125  case Builtin::BI__sync_lock_test_and_set:
1126  case Builtin::BI__sync_lock_release:
1127  case Builtin::BI__sync_swap:
1128  llvm_unreachable("Shouldn't make it through sema");
1129  case Builtin::BI__sync_fetch_and_add_1:
1130  case Builtin::BI__sync_fetch_and_add_2:
1131  case Builtin::BI__sync_fetch_and_add_4:
1132  case Builtin::BI__sync_fetch_and_add_8:
1133  case Builtin::BI__sync_fetch_and_add_16:
1134  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1135  case Builtin::BI__sync_fetch_and_sub_1:
1136  case Builtin::BI__sync_fetch_and_sub_2:
1137  case Builtin::BI__sync_fetch_and_sub_4:
1138  case Builtin::BI__sync_fetch_and_sub_8:
1139  case Builtin::BI__sync_fetch_and_sub_16:
1140  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1141  case Builtin::BI__sync_fetch_and_or_1:
1142  case Builtin::BI__sync_fetch_and_or_2:
1143  case Builtin::BI__sync_fetch_and_or_4:
1144  case Builtin::BI__sync_fetch_and_or_8:
1145  case Builtin::BI__sync_fetch_and_or_16:
1146  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1147  case Builtin::BI__sync_fetch_and_and_1:
1148  case Builtin::BI__sync_fetch_and_and_2:
1149  case Builtin::BI__sync_fetch_and_and_4:
1150  case Builtin::BI__sync_fetch_and_and_8:
1151  case Builtin::BI__sync_fetch_and_and_16:
1152  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1153  case Builtin::BI__sync_fetch_and_xor_1:
1154  case Builtin::BI__sync_fetch_and_xor_2:
1155  case Builtin::BI__sync_fetch_and_xor_4:
1156  case Builtin::BI__sync_fetch_and_xor_8:
1157  case Builtin::BI__sync_fetch_and_xor_16:
1158  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1159  case Builtin::BI__sync_fetch_and_nand_1:
1160  case Builtin::BI__sync_fetch_and_nand_2:
1161  case Builtin::BI__sync_fetch_and_nand_4:
1162  case Builtin::BI__sync_fetch_and_nand_8:
1163  case Builtin::BI__sync_fetch_and_nand_16:
1164  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1165 
1166  // Clang extensions: not overloaded yet.
1167  case Builtin::BI__sync_fetch_and_min:
1168  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1169  case Builtin::BI__sync_fetch_and_max:
1170  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1171  case Builtin::BI__sync_fetch_and_umin:
1172  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1173  case Builtin::BI__sync_fetch_and_umax:
1174  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1175 
1176  case Builtin::BI__sync_add_and_fetch_1:
1177  case Builtin::BI__sync_add_and_fetch_2:
1178  case Builtin::BI__sync_add_and_fetch_4:
1179  case Builtin::BI__sync_add_and_fetch_8:
1180  case Builtin::BI__sync_add_and_fetch_16:
1181  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1182  llvm::Instruction::Add);
1183  case Builtin::BI__sync_sub_and_fetch_1:
1184  case Builtin::BI__sync_sub_and_fetch_2:
1185  case Builtin::BI__sync_sub_and_fetch_4:
1186  case Builtin::BI__sync_sub_and_fetch_8:
1187  case Builtin::BI__sync_sub_and_fetch_16:
1188  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1189  llvm::Instruction::Sub);
1190  case Builtin::BI__sync_and_and_fetch_1:
1191  case Builtin::BI__sync_and_and_fetch_2:
1192  case Builtin::BI__sync_and_and_fetch_4:
1193  case Builtin::BI__sync_and_and_fetch_8:
1194  case Builtin::BI__sync_and_and_fetch_16:
1197  case Builtin::BI__sync_or_and_fetch_1:
1198  case Builtin::BI__sync_or_and_fetch_2:
1199  case Builtin::BI__sync_or_and_fetch_4:
1200  case Builtin::BI__sync_or_and_fetch_8:
1201  case Builtin::BI__sync_or_and_fetch_16:
1202  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1203  llvm::Instruction::Or);
1204  case Builtin::BI__sync_xor_and_fetch_1:
1205  case Builtin::BI__sync_xor_and_fetch_2:
1206  case Builtin::BI__sync_xor_and_fetch_4:
1207  case Builtin::BI__sync_xor_and_fetch_8:
1208  case Builtin::BI__sync_xor_and_fetch_16:
1209  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1210  llvm::Instruction::Xor);
1211  case Builtin::BI__sync_nand_and_fetch_1:
1212  case Builtin::BI__sync_nand_and_fetch_2:
1213  case Builtin::BI__sync_nand_and_fetch_4:
1214  case Builtin::BI__sync_nand_and_fetch_8:
1215  case Builtin::BI__sync_nand_and_fetch_16:
1216  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1217  llvm::Instruction::And, true);
1218 
1219  case Builtin::BI__sync_val_compare_and_swap_1:
1220  case Builtin::BI__sync_val_compare_and_swap_2:
1221  case Builtin::BI__sync_val_compare_and_swap_4:
1222  case Builtin::BI__sync_val_compare_and_swap_8:
1223  case Builtin::BI__sync_val_compare_and_swap_16:
1224  return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1225 
1226  case Builtin::BI__sync_bool_compare_and_swap_1:
1227  case Builtin::BI__sync_bool_compare_and_swap_2:
1228  case Builtin::BI__sync_bool_compare_and_swap_4:
1229  case Builtin::BI__sync_bool_compare_and_swap_8:
1230  case Builtin::BI__sync_bool_compare_and_swap_16:
1231  return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1232 
1233  case Builtin::BI__sync_swap_1:
1234  case Builtin::BI__sync_swap_2:
1235  case Builtin::BI__sync_swap_4:
1236  case Builtin::BI__sync_swap_8:
1237  case Builtin::BI__sync_swap_16:
1238  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1239 
1240  case Builtin::BI__sync_lock_test_and_set_1:
1241  case Builtin::BI__sync_lock_test_and_set_2:
1242  case Builtin::BI__sync_lock_test_and_set_4:
1243  case Builtin::BI__sync_lock_test_and_set_8:
1244  case Builtin::BI__sync_lock_test_and_set_16:
1245  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1246 
1247  case Builtin::BI__sync_lock_release_1:
1248  case Builtin::BI__sync_lock_release_2:
1249  case Builtin::BI__sync_lock_release_4:
1250  case Builtin::BI__sync_lock_release_8:
1251  case Builtin::BI__sync_lock_release_16: {
1252  Value *Ptr = EmitScalarExpr(E->getArg(0));
1253  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1254  CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1255  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1256  StoreSize.getQuantity() * 8);
1257  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1258  llvm::StoreInst *Store =
1259  Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1260  StoreSize);
1261  Store->setAtomic(llvm::Release);
1262  return RValue::get(nullptr);
1263  }
1264 
1265  case Builtin::BI__sync_synchronize: {
1266  // We assume this is supposed to correspond to a C++0x-style
1267  // sequentially-consistent fence (i.e. this is only usable for
1268  // synchonization, not device I/O or anything like that). This intrinsic
1269  // is really badly designed in the sense that in theory, there isn't
1270  // any way to safely use it... but in practice, it mostly works
1271  // to use it with non-atomic loads and stores to get acquire/release
1272  // semantics.
1273  Builder.CreateFence(llvm::SequentiallyConsistent);
1274  return RValue::get(nullptr);
1275  }
1276 
1277  case Builtin::BI__builtin_nontemporal_load:
1278  return RValue::get(EmitNontemporalLoad(*this, E));
1279  case Builtin::BI__builtin_nontemporal_store:
1280  return RValue::get(EmitNontemporalStore(*this, E));
1281  case Builtin::BI__c11_atomic_is_lock_free:
1282  case Builtin::BI__atomic_is_lock_free: {
1283  // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1284  // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1285  // _Atomic(T) is always properly-aligned.
1286  const char *LibCallName = "__atomic_is_lock_free";
1287  CallArgList Args;
1288  Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1289  getContext().getSizeType());
1290  if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1291  Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1292  getContext().VoidPtrTy);
1293  else
1294  Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1295  getContext().VoidPtrTy);
1296  const CGFunctionInfo &FuncInfo =
1297  CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args,
1300  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1301  llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1302  return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
1303  }
1304 
1305  case Builtin::BI__atomic_test_and_set: {
1306  // Look at the argument type to determine whether this is a volatile
1307  // operation. The parameter type is always volatile.
1308  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1309  bool Volatile =
1310  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1311 
1312  Value *Ptr = EmitScalarExpr(E->getArg(0));
1313  unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1314  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1315  Value *NewVal = Builder.getInt8(1);
1316  Value *Order = EmitScalarExpr(E->getArg(1));
1317  if (isa<llvm::ConstantInt>(Order)) {
1318  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1319  AtomicRMWInst *Result = nullptr;
1320  switch (ord) {
1321  case 0: // memory_order_relaxed
1322  default: // invalid order
1323  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1324  Ptr, NewVal,
1325  llvm::Monotonic);
1326  break;
1327  case 1: // memory_order_consume
1328  case 2: // memory_order_acquire
1329  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1330  Ptr, NewVal,
1331  llvm::Acquire);
1332  break;
1333  case 3: // memory_order_release
1334  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1335  Ptr, NewVal,
1336  llvm::Release);
1337  break;
1338  case 4: // memory_order_acq_rel
1339  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1340  Ptr, NewVal,
1341  llvm::AcquireRelease);
1342  break;
1343  case 5: // memory_order_seq_cst
1344  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1345  Ptr, NewVal,
1346  llvm::SequentiallyConsistent);
1347  break;
1348  }
1349  Result->setVolatile(Volatile);
1350  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1351  }
1352 
1353  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1354 
1355  llvm::BasicBlock *BBs[5] = {
1356  createBasicBlock("monotonic", CurFn),
1357  createBasicBlock("acquire", CurFn),
1358  createBasicBlock("release", CurFn),
1359  createBasicBlock("acqrel", CurFn),
1360  createBasicBlock("seqcst", CurFn)
1361  };
1362  llvm::AtomicOrdering Orders[5] = {
1363  llvm::Monotonic, llvm::Acquire, llvm::Release,
1364  llvm::AcquireRelease, llvm::SequentiallyConsistent
1365  };
1366 
1367  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1368  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1369 
1370  Builder.SetInsertPoint(ContBB);
1371  PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1372 
1373  for (unsigned i = 0; i < 5; ++i) {
1374  Builder.SetInsertPoint(BBs[i]);
1375  AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1376  Ptr, NewVal, Orders[i]);
1377  RMW->setVolatile(Volatile);
1378  Result->addIncoming(RMW, BBs[i]);
1379  Builder.CreateBr(ContBB);
1380  }
1381 
1382  SI->addCase(Builder.getInt32(0), BBs[0]);
1383  SI->addCase(Builder.getInt32(1), BBs[1]);
1384  SI->addCase(Builder.getInt32(2), BBs[1]);
1385  SI->addCase(Builder.getInt32(3), BBs[2]);
1386  SI->addCase(Builder.getInt32(4), BBs[3]);
1387  SI->addCase(Builder.getInt32(5), BBs[4]);
1388 
1389  Builder.SetInsertPoint(ContBB);
1390  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1391  }
1392 
1393  case Builtin::BI__atomic_clear: {
1394  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1395  bool Volatile =
1396  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1397 
1398  Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1399  unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1400  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1401  Value *NewVal = Builder.getInt8(0);
1402  Value *Order = EmitScalarExpr(E->getArg(1));
1403  if (isa<llvm::ConstantInt>(Order)) {
1404  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1405  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1406  switch (ord) {
1407  case 0: // memory_order_relaxed
1408  default: // invalid order
1409  Store->setOrdering(llvm::Monotonic);
1410  break;
1411  case 3: // memory_order_release
1412  Store->setOrdering(llvm::Release);
1413  break;
1414  case 5: // memory_order_seq_cst
1415  Store->setOrdering(llvm::SequentiallyConsistent);
1416  break;
1417  }
1418  return RValue::get(nullptr);
1419  }
1420 
1421  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1422 
1423  llvm::BasicBlock *BBs[3] = {
1424  createBasicBlock("monotonic", CurFn),
1425  createBasicBlock("release", CurFn),
1426  createBasicBlock("seqcst", CurFn)
1427  };
1428  llvm::AtomicOrdering Orders[3] = {
1429  llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent
1430  };
1431 
1432  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1433  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1434 
1435  for (unsigned i = 0; i < 3; ++i) {
1436  Builder.SetInsertPoint(BBs[i]);
1437  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1438  Store->setOrdering(Orders[i]);
1439  Builder.CreateBr(ContBB);
1440  }
1441 
1442  SI->addCase(Builder.getInt32(0), BBs[0]);
1443  SI->addCase(Builder.getInt32(3), BBs[1]);
1444  SI->addCase(Builder.getInt32(5), BBs[2]);
1445 
1446  Builder.SetInsertPoint(ContBB);
1447  return RValue::get(nullptr);
1448  }
1449 
1450  case Builtin::BI__atomic_thread_fence:
1451  case Builtin::BI__atomic_signal_fence:
1452  case Builtin::BI__c11_atomic_thread_fence:
1453  case Builtin::BI__c11_atomic_signal_fence: {
1454  llvm::SynchronizationScope Scope;
1455  if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1456  BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1457  Scope = llvm::SingleThread;
1458  else
1459  Scope = llvm::CrossThread;
1460  Value *Order = EmitScalarExpr(E->getArg(0));
1461  if (isa<llvm::ConstantInt>(Order)) {
1462  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1463  switch (ord) {
1464  case 0: // memory_order_relaxed
1465  default: // invalid order
1466  break;
1467  case 1: // memory_order_consume
1468  case 2: // memory_order_acquire
1469  Builder.CreateFence(llvm::Acquire, Scope);
1470  break;
1471  case 3: // memory_order_release
1472  Builder.CreateFence(llvm::Release, Scope);
1473  break;
1474  case 4: // memory_order_acq_rel
1475  Builder.CreateFence(llvm::AcquireRelease, Scope);
1476  break;
1477  case 5: // memory_order_seq_cst
1478  Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1479  break;
1480  }
1481  return RValue::get(nullptr);
1482  }
1483 
1484  llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1485  AcquireBB = createBasicBlock("acquire", CurFn);
1486  ReleaseBB = createBasicBlock("release", CurFn);
1487  AcqRelBB = createBasicBlock("acqrel", CurFn);
1488  SeqCstBB = createBasicBlock("seqcst", CurFn);
1489  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1490 
1491  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1492  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1493 
1494  Builder.SetInsertPoint(AcquireBB);
1495  Builder.CreateFence(llvm::Acquire, Scope);
1496  Builder.CreateBr(ContBB);
1497  SI->addCase(Builder.getInt32(1), AcquireBB);
1498  SI->addCase(Builder.getInt32(2), AcquireBB);
1499 
1500  Builder.SetInsertPoint(ReleaseBB);
1501  Builder.CreateFence(llvm::Release, Scope);
1502  Builder.CreateBr(ContBB);
1503  SI->addCase(Builder.getInt32(3), ReleaseBB);
1504 
1505  Builder.SetInsertPoint(AcqRelBB);
1506  Builder.CreateFence(llvm::AcquireRelease, Scope);
1507  Builder.CreateBr(ContBB);
1508  SI->addCase(Builder.getInt32(4), AcqRelBB);
1509 
1510  Builder.SetInsertPoint(SeqCstBB);
1511  Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
1512  Builder.CreateBr(ContBB);
1513  SI->addCase(Builder.getInt32(5), SeqCstBB);
1514 
1515  Builder.SetInsertPoint(ContBB);
1516  return RValue::get(nullptr);
1517  }
1518 
1519  // Library functions with special handling.
1520  case Builtin::BIsqrt:
1521  case Builtin::BIsqrtf:
1522  case Builtin::BIsqrtl: {
1523  // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1524  // in finite- or unsafe-math mode (the intrinsic has different semantics
1525  // for handling negative numbers compared to the library function, so
1526  // -fmath-errno=0 is not enough).
1527  if (!FD->hasAttr<ConstAttr>())
1528  break;
1529  if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1530  CGM.getCodeGenOpts().NoNaNsFPMath))
1531  break;
1532  Value *Arg0 = EmitScalarExpr(E->getArg(0));
1533  llvm::Type *ArgType = Arg0->getType();
1534  Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1535  return RValue::get(Builder.CreateCall(F, Arg0));
1536  }
1537 
1538  case Builtin::BI__builtin_pow:
1539  case Builtin::BI__builtin_powf:
1540  case Builtin::BI__builtin_powl:
1541  case Builtin::BIpow:
1542  case Builtin::BIpowf:
1543  case Builtin::BIpowl: {
1544  // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1545  if (!FD->hasAttr<ConstAttr>())
1546  break;
1547  Value *Base = EmitScalarExpr(E->getArg(0));
1548  Value *Exponent = EmitScalarExpr(E->getArg(1));
1549  llvm::Type *ArgType = Base->getType();
1550  Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1551  return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1552  }
1553 
1554  case Builtin::BIfma:
1555  case Builtin::BIfmaf:
1556  case Builtin::BIfmal:
1557  case Builtin::BI__builtin_fma:
1558  case Builtin::BI__builtin_fmaf:
1559  case Builtin::BI__builtin_fmal: {
1560  // Rewrite fma to intrinsic.
1561  Value *FirstArg = EmitScalarExpr(E->getArg(0));
1562  llvm::Type *ArgType = FirstArg->getType();
1563  Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1564  return RValue::get(
1565  Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1566  EmitScalarExpr(E->getArg(2))}));
1567  }
1568 
1569  case Builtin::BI__builtin_signbit:
1570  case Builtin::BI__builtin_signbitf:
1571  case Builtin::BI__builtin_signbitl: {
1572  return RValue::get(
1573  Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1574  ConvertType(E->getType())));
1575  }
1576  case Builtin::BI__builtin_annotation: {
1577  llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1578  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1579  AnnVal->getType());
1580 
1581  // Get the annotation string, go through casts. Sema requires this to be a
1582  // non-wide string literal, potentially casted, so the cast<> is safe.
1583  const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1584  StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1585  return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1586  }
1587  case Builtin::BI__builtin_addcb:
1588  case Builtin::BI__builtin_addcs:
1589  case Builtin::BI__builtin_addc:
1590  case Builtin::BI__builtin_addcl:
1591  case Builtin::BI__builtin_addcll:
1592  case Builtin::BI__builtin_subcb:
1593  case Builtin::BI__builtin_subcs:
1594  case Builtin::BI__builtin_subc:
1595  case Builtin::BI__builtin_subcl:
1596  case Builtin::BI__builtin_subcll: {
1597 
1598  // We translate all of these builtins from expressions of the form:
1599  // int x = ..., y = ..., carryin = ..., carryout, result;
1600  // result = __builtin_addc(x, y, carryin, &carryout);
1601  //
1602  // to LLVM IR of the form:
1603  //
1604  // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1605  // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1606  // %carry1 = extractvalue {i32, i1} %tmp1, 1
1607  // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1608  // i32 %carryin)
1609  // %result = extractvalue {i32, i1} %tmp2, 0
1610  // %carry2 = extractvalue {i32, i1} %tmp2, 1
1611  // %tmp3 = or i1 %carry1, %carry2
1612  // %tmp4 = zext i1 %tmp3 to i32
1613  // store i32 %tmp4, i32* %carryout
1614 
1615  // Scalarize our inputs.
1616  llvm::Value *X = EmitScalarExpr(E->getArg(0));
1617  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1618  llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1619  Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1620 
1621  // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1622  llvm::Intrinsic::ID IntrinsicId;
1623  switch (BuiltinID) {
1624  default: llvm_unreachable("Unknown multiprecision builtin id.");
1625  case Builtin::BI__builtin_addcb:
1626  case Builtin::BI__builtin_addcs:
1627  case Builtin::BI__builtin_addc:
1628  case Builtin::BI__builtin_addcl:
1629  case Builtin::BI__builtin_addcll:
1630  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1631  break;
1632  case Builtin::BI__builtin_subcb:
1633  case Builtin::BI__builtin_subcs:
1634  case Builtin::BI__builtin_subc:
1635  case Builtin::BI__builtin_subcl:
1636  case Builtin::BI__builtin_subcll:
1637  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1638  break;
1639  }
1640 
1641  // Construct our resulting LLVM IR expression.
1642  llvm::Value *Carry1;
1643  llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
1644  X, Y, Carry1);
1645  llvm::Value *Carry2;
1646  llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
1647  Sum1, Carryin, Carry2);
1648  llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
1649  X->getType());
1650  Builder.CreateStore(CarryOut, CarryOutPtr);
1651  return RValue::get(Sum2);
1652  }
1653 
1654  case Builtin::BI__builtin_add_overflow:
1655  case Builtin::BI__builtin_sub_overflow:
1656  case Builtin::BI__builtin_mul_overflow: {
1657  const clang::Expr *LeftArg = E->getArg(0);
1658  const clang::Expr *RightArg = E->getArg(1);
1659  const clang::Expr *ResultArg = E->getArg(2);
1660 
1661  clang::QualType ResultQTy =
1662  ResultArg->getType()->castAs<PointerType>()->getPointeeType();
1663 
1664  WidthAndSignedness LeftInfo =
1665  getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
1666  WidthAndSignedness RightInfo =
1667  getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
1668  WidthAndSignedness ResultInfo =
1669  getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
1670  WidthAndSignedness EncompassingInfo =
1671  EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
1672 
1673  llvm::Type *EncompassingLLVMTy =
1674  llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
1675 
1676  llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
1677 
1678  llvm::Intrinsic::ID IntrinsicId;
1679  switch (BuiltinID) {
1680  default:
1681  llvm_unreachable("Unknown overflow builtin id.");
1682  case Builtin::BI__builtin_add_overflow:
1683  IntrinsicId = EncompassingInfo.Signed
1684  ? llvm::Intrinsic::sadd_with_overflow
1685  : llvm::Intrinsic::uadd_with_overflow;
1686  break;
1687  case Builtin::BI__builtin_sub_overflow:
1688  IntrinsicId = EncompassingInfo.Signed
1689  ? llvm::Intrinsic::ssub_with_overflow
1690  : llvm::Intrinsic::usub_with_overflow;
1691  break;
1692  case Builtin::BI__builtin_mul_overflow:
1693  IntrinsicId = EncompassingInfo.Signed
1694  ? llvm::Intrinsic::smul_with_overflow
1695  : llvm::Intrinsic::umul_with_overflow;
1696  break;
1697  }
1698 
1699  llvm::Value *Left = EmitScalarExpr(LeftArg);
1700  llvm::Value *Right = EmitScalarExpr(RightArg);
1701  Address ResultPtr = EmitPointerWithAlignment(ResultArg);
1702 
1703  // Extend each operand to the encompassing type.
1704  Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
1705  Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
1706 
1707  // Perform the operation on the extended values.
1708  llvm::Value *Overflow, *Result;
1709  Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
1710 
1711  if (EncompassingInfo.Width > ResultInfo.Width) {
1712  // The encompassing type is wider than the result type, so we need to
1713  // truncate it.
1714  llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
1715 
1716  // To see if the truncation caused an overflow, we will extend
1717  // the result and then compare it to the original result.
1718  llvm::Value *ResultTruncExt = Builder.CreateIntCast(
1719  ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
1720  llvm::Value *TruncationOverflow =
1721  Builder.CreateICmpNE(Result, ResultTruncExt);
1722 
1723  Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
1724  Result = ResultTrunc;
1725  }
1726 
1727  // Finally, store the result using the pointer.
1728  bool isVolatile =
1729  ResultArg->getType()->getPointeeType().isVolatileQualified();
1730  Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
1731 
1732  return RValue::get(Overflow);
1733  }
1734 
1735  case Builtin::BI__builtin_uadd_overflow:
1736  case Builtin::BI__builtin_uaddl_overflow:
1737  case Builtin::BI__builtin_uaddll_overflow:
1738  case Builtin::BI__builtin_usub_overflow:
1739  case Builtin::BI__builtin_usubl_overflow:
1740  case Builtin::BI__builtin_usubll_overflow:
1741  case Builtin::BI__builtin_umul_overflow:
1742  case Builtin::BI__builtin_umull_overflow:
1743  case Builtin::BI__builtin_umulll_overflow:
1744  case Builtin::BI__builtin_sadd_overflow:
1745  case Builtin::BI__builtin_saddl_overflow:
1746  case Builtin::BI__builtin_saddll_overflow:
1747  case Builtin::BI__builtin_ssub_overflow:
1748  case Builtin::BI__builtin_ssubl_overflow:
1749  case Builtin::BI__builtin_ssubll_overflow:
1750  case Builtin::BI__builtin_smul_overflow:
1751  case Builtin::BI__builtin_smull_overflow:
1752  case Builtin::BI__builtin_smulll_overflow: {
1753 
1754  // We translate all of these builtins directly to the relevant llvm IR node.
1755 
1756  // Scalarize our inputs.
1757  llvm::Value *X = EmitScalarExpr(E->getArg(0));
1758  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1759  Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
1760 
1761  // Decide which of the overflow intrinsics we are lowering to:
1762  llvm::Intrinsic::ID IntrinsicId;
1763  switch (BuiltinID) {
1764  default: llvm_unreachable("Unknown overflow builtin id.");
1765  case Builtin::BI__builtin_uadd_overflow:
1766  case Builtin::BI__builtin_uaddl_overflow:
1767  case Builtin::BI__builtin_uaddll_overflow:
1768  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1769  break;
1770  case Builtin::BI__builtin_usub_overflow:
1771  case Builtin::BI__builtin_usubl_overflow:
1772  case Builtin::BI__builtin_usubll_overflow:
1773  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1774  break;
1775  case Builtin::BI__builtin_umul_overflow:
1776  case Builtin::BI__builtin_umull_overflow:
1777  case Builtin::BI__builtin_umulll_overflow:
1778  IntrinsicId = llvm::Intrinsic::umul_with_overflow;
1779  break;
1780  case Builtin::BI__builtin_sadd_overflow:
1781  case Builtin::BI__builtin_saddl_overflow:
1782  case Builtin::BI__builtin_saddll_overflow:
1783  IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
1784  break;
1785  case Builtin::BI__builtin_ssub_overflow:
1786  case Builtin::BI__builtin_ssubl_overflow:
1787  case Builtin::BI__builtin_ssubll_overflow:
1788  IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
1789  break;
1790  case Builtin::BI__builtin_smul_overflow:
1791  case Builtin::BI__builtin_smull_overflow:
1792  case Builtin::BI__builtin_smulll_overflow:
1793  IntrinsicId = llvm::Intrinsic::smul_with_overflow;
1794  break;
1795  }
1796 
1797 
1798  llvm::Value *Carry;
1799  llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
1800  Builder.CreateStore(Sum, SumOutPtr);
1801 
1802  return RValue::get(Carry);
1803  }
1804  case Builtin::BI__builtin_addressof:
1805  return RValue::get(EmitLValue(E->getArg(0)).getPointer());
1806  case Builtin::BI__builtin_operator_new:
1807  return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1808  E->getArg(0), false);
1809  case Builtin::BI__builtin_operator_delete:
1810  return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1811  E->getArg(0), true);
1812  case Builtin::BI__noop:
1813  // __noop always evaluates to an integer literal zero.
1814  return RValue::get(ConstantInt::get(IntTy, 0));
1815  case Builtin::BI__builtin_call_with_static_chain: {
1816  const CallExpr *Call = cast<CallExpr>(E->getArg(0));
1817  const Expr *Chain = E->getArg(1);
1818  return EmitCall(Call->getCallee()->getType(),
1819  EmitScalarExpr(Call->getCallee()), Call, ReturnValue,
1820  Call->getCalleeDecl(), EmitScalarExpr(Chain));
1821  }
1822  case Builtin::BI_InterlockedExchange:
1823  case Builtin::BI_InterlockedExchangePointer:
1824  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1825  case Builtin::BI_InterlockedCompareExchangePointer: {
1826  llvm::Type *RTy;
1827  llvm::IntegerType *IntType =
1828  IntegerType::get(getLLVMContext(),
1829  getContext().getTypeSize(E->getType()));
1830  llvm::Type *IntPtrType = IntType->getPointerTo();
1831 
1832  llvm::Value *Destination =
1833  Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
1834 
1835  llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
1836  RTy = Exchange->getType();
1837  Exchange = Builder.CreatePtrToInt(Exchange, IntType);
1838 
1839  llvm::Value *Comparand =
1840  Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
1841 
1842  auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
1843  SequentiallyConsistent,
1844  SequentiallyConsistent);
1845  Result->setVolatile(true);
1846 
1847  return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
1848  0),
1849  RTy));
1850  }
1851  case Builtin::BI_InterlockedCompareExchange: {
1852  AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
1853  EmitScalarExpr(E->getArg(0)),
1854  EmitScalarExpr(E->getArg(2)),
1855  EmitScalarExpr(E->getArg(1)),
1856  SequentiallyConsistent,
1857  SequentiallyConsistent);
1858  CXI->setVolatile(true);
1859  return RValue::get(Builder.CreateExtractValue(CXI, 0));
1860  }
1861  case Builtin::BI_InterlockedIncrement: {
1862  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1863  AtomicRMWInst::Add,
1864  EmitScalarExpr(E->getArg(0)),
1865  ConstantInt::get(Int32Ty, 1),
1866  llvm::SequentiallyConsistent);
1867  RMWI->setVolatile(true);
1868  return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(Int32Ty, 1)));
1869  }
1870  case Builtin::BI_InterlockedDecrement: {
1871  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1872  AtomicRMWInst::Sub,
1873  EmitScalarExpr(E->getArg(0)),
1874  ConstantInt::get(Int32Ty, 1),
1875  llvm::SequentiallyConsistent);
1876  RMWI->setVolatile(true);
1877  return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(Int32Ty, 1)));
1878  }
1879  case Builtin::BI_InterlockedExchangeAdd: {
1880  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1881  AtomicRMWInst::Add,
1882  EmitScalarExpr(E->getArg(0)),
1883  EmitScalarExpr(E->getArg(1)),
1884  llvm::SequentiallyConsistent);
1885  RMWI->setVolatile(true);
1886  return RValue::get(RMWI);
1887  }
1888  case Builtin::BI__readfsdword: {
1889  Value *IntToPtr =
1890  Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
1891  llvm::PointerType::get(CGM.Int32Ty, 257));
1892  LoadInst *Load =
1893  Builder.CreateAlignedLoad(IntToPtr, /*Align=*/4, /*isVolatile=*/true);
1894  return RValue::get(Load);
1895  }
1896 
1897  case Builtin::BI__exception_code:
1898  case Builtin::BI_exception_code:
1899  return RValue::get(EmitSEHExceptionCode());
1900  case Builtin::BI__exception_info:
1901  case Builtin::BI_exception_info:
1902  return RValue::get(EmitSEHExceptionInfo());
1903  case Builtin::BI__abnormal_termination:
1904  case Builtin::BI_abnormal_termination:
1905  return RValue::get(EmitSEHAbnormalTermination());
1906  case Builtin::BI_setjmpex: {
1907  if (getTarget().getTriple().isOSMSVCRT()) {
1908  llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
1909  llvm::AttributeSet ReturnsTwiceAttr =
1910  AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
1911  llvm::Attribute::ReturnsTwice);
1912  llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
1913  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
1914  "_setjmpex", ReturnsTwiceAttr);
1915  llvm::Value *Buf = Builder.CreateBitOrPointerCast(
1916  EmitScalarExpr(E->getArg(0)), Int8PtrTy);
1917  llvm::Value *FrameAddr =
1918  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1919  ConstantInt::get(Int32Ty, 0));
1920  llvm::Value *Args[] = {Buf, FrameAddr};
1921  llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
1922  CS.setAttributes(ReturnsTwiceAttr);
1923  return RValue::get(CS.getInstruction());
1924  }
1925  break;
1926  }
1927  case Builtin::BI_setjmp: {
1928  if (getTarget().getTriple().isOSMSVCRT()) {
1929  llvm::AttributeSet ReturnsTwiceAttr =
1930  AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
1931  llvm::Attribute::ReturnsTwice);
1932  llvm::Value *Buf = Builder.CreateBitOrPointerCast(
1933  EmitScalarExpr(E->getArg(0)), Int8PtrTy);
1934  llvm::CallSite CS;
1935  if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
1936  llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
1937  llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
1938  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
1939  "_setjmp3", ReturnsTwiceAttr);
1940  llvm::Value *Count = ConstantInt::get(IntTy, 0);
1941  llvm::Value *Args[] = {Buf, Count};
1942  CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
1943  } else {
1944  llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
1945  llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
1946  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
1947  "_setjmp", ReturnsTwiceAttr);
1948  llvm::Value *FrameAddr =
1949  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1950  ConstantInt::get(Int32Ty, 0));
1951  llvm::Value *Args[] = {Buf, FrameAddr};
1952  CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
1953  }
1954  CS.setAttributes(ReturnsTwiceAttr);
1955  return RValue::get(CS.getInstruction());
1956  }
1957  break;
1958  }
1959 
1960  case Builtin::BI__GetExceptionInfo: {
1961  if (llvm::GlobalVariable *GV =
1962  CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
1963  return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
1964  break;
1965  }
1966  }
1967 
1968  // If this is an alias for a lib function (e.g. __builtin_sin), emit
1969  // the call using the normal call path, but using the unmangled
1970  // version of the function name.
1971  if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
1972  return emitLibraryCall(*this, FD, E,
1973  CGM.getBuiltinLibFunction(FD, BuiltinID));
1974 
1975  // If this is a predefined lib function (e.g. malloc), emit the call
1976  // using exactly the normal call path.
1977  if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
1978  return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
1979 
1980  // Check that a call to a target specific builtin has the correct target
1981  // features.
1982  // This is down here to avoid non-target specific builtins, however, if
1983  // generic builtins start to require generic target features then we
1984  // can move this up to the beginning of the function.
1985  checkTargetFeatures(E, FD);
1986 
1987  // See if we have a target specific intrinsic.
1988  const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
1989  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
1990  if (const char *Prefix =
1991  llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) {
1992  IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
1993  // NOTE we dont need to perform a compatibility flag check here since the
1994  // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
1995  // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
1996  if (IntrinsicID == Intrinsic::not_intrinsic)
1997  IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name);
1998  }
1999 
2000  if (IntrinsicID != Intrinsic::not_intrinsic) {
2002 
2003  // Find out if any arguments are required to be integer constant
2004  // expressions.
2005  unsigned ICEArguments = 0;
2007  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2008  assert(Error == ASTContext::GE_None && "Should not codegen an error");
2009 
2010  Function *F = CGM.getIntrinsic(IntrinsicID);
2011  llvm::FunctionType *FTy = F->getFunctionType();
2012 
2013  for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2014  Value *ArgValue;
2015  // If this is a normal argument, just emit it as a scalar.
2016  if ((ICEArguments & (1 << i)) == 0) {
2017  ArgValue = EmitScalarExpr(E->getArg(i));
2018  } else {
2019  // If this is required to be a constant, constant fold it so that we
2020  // know that the generated intrinsic gets a ConstantInt.
2021  llvm::APSInt Result;
2022  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2023  assert(IsConst && "Constant arg isn't actually constant?");
2024  (void)IsConst;
2025  ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2026  }
2027 
2028  // If the intrinsic arg type is different from the builtin arg type
2029  // we need to do a bit cast.
2030  llvm::Type *PTy = FTy->getParamType(i);
2031  if (PTy != ArgValue->getType()) {
2032  assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2033  "Must be able to losslessly bit cast to param");
2034  ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2035  }
2036 
2037  Args.push_back(ArgValue);
2038  }
2039 
2040  Value *V = Builder.CreateCall(F, Args);
2041  QualType BuiltinRetType = E->getType();
2042 
2043  llvm::Type *RetTy = VoidTy;
2044  if (!BuiltinRetType->isVoidType())
2045  RetTy = ConvertType(BuiltinRetType);
2046 
2047  if (RetTy != V->getType()) {
2048  assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2049  "Must be able to losslessly bit cast result type");
2050  V = Builder.CreateBitCast(V, RetTy);
2051  }
2052 
2053  return RValue::get(V);
2054  }
2055 
2056  // See if we have a target specific builtin that needs to be lowered.
2057  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2058  return RValue::get(V);
2059 
2060  ErrorUnsupported(E, "builtin function");
2061 
2062  // Unknown builtin, for now just dump it out and return undef.
2063  return GetUndefRValue(E->getType());
2064 }
2065 
2067  unsigned BuiltinID, const CallExpr *E,
2068  llvm::Triple::ArchType Arch) {
2069  switch (Arch) {
2070  case llvm::Triple::arm:
2071  case llvm::Triple::armeb:
2072  case llvm::Triple::thumb:
2073  case llvm::Triple::thumbeb:
2074  return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2075  case llvm::Triple::aarch64:
2076  case llvm::Triple::aarch64_be:
2077  return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2078  case llvm::Triple::x86:
2079  case llvm::Triple::x86_64:
2080  return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2081  case llvm::Triple::ppc:
2082  case llvm::Triple::ppc64:
2083  case llvm::Triple::ppc64le:
2084  return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2085  case llvm::Triple::r600:
2086  case llvm::Triple::amdgcn:
2087  return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2088  case llvm::Triple::systemz:
2089  return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2090  case llvm::Triple::nvptx:
2091  case llvm::Triple::nvptx64:
2092  return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2093  case llvm::Triple::wasm32:
2094  case llvm::Triple::wasm64:
2095  return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2096  default:
2097  return nullptr;
2098  }
2099 }
2100 
2101 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
2102  const CallExpr *E) {
2103  if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
2104  assert(getContext().getAuxTargetInfo() && "Missing aux target info");
2106  this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
2107  getContext().getAuxTargetInfo()->getTriple().getArch());
2108  }
2109 
2110  return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
2111  getTarget().getTriple().getArch());
2112 }
2113 
2114 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
2115  NeonTypeFlags TypeFlags,
2116  bool V1Ty=false) {
2117  int IsQuad = TypeFlags.isQuad();
2118  switch (TypeFlags.getEltType()) {
2119  case NeonTypeFlags::Int8:
2120  case NeonTypeFlags::Poly8:
2121  return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
2122  case NeonTypeFlags::Int16:
2123  case NeonTypeFlags::Poly16:
2125  return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
2126  case NeonTypeFlags::Int32:
2127  return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
2128  case NeonTypeFlags::Int64:
2129  case NeonTypeFlags::Poly64:
2130  return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
2132  // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
2133  // There is a lot of i128 and f128 API missing.
2134  // so we use v16i8 to represent poly128 and get pattern matched.
2135  return llvm::VectorType::get(CGF->Int8Ty, 16);
2137  return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
2139  return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
2140  }
2141  llvm_unreachable("Unknown vector element type!");
2142 }
2143 
2144 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
2145  NeonTypeFlags IntTypeFlags) {
2146  int IsQuad = IntTypeFlags.isQuad();
2147  switch (IntTypeFlags.getEltType()) {
2148  case NeonTypeFlags::Int32:
2149  return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
2150  case NeonTypeFlags::Int64:
2151  return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
2152  default:
2153  llvm_unreachable("Type can't be converted to floating-point!");
2154  }
2155 }
2156 
2158  unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
2159  Value* SV = llvm::ConstantVector::getSplat(nElts, C);
2160  return Builder.CreateShuffleVector(V, V, SV, "lane");
2161 }
2162 
2164  const char *name,
2165  unsigned shift, bool rightshift) {
2166  unsigned j = 0;
2167  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
2168  ai != ae; ++ai, ++j)
2169  if (shift > 0 && shift == j)
2170  Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
2171  else
2172  Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
2173 
2174  return Builder.CreateCall(F, Ops, name);
2175 }
2176 
2178  bool neg) {
2179  int SV = cast<ConstantInt>(V)->getSExtValue();
2180  return ConstantInt::get(Ty, neg ? -SV : SV);
2181 }
2182 
2183 // \brief Right-shift a vector by a constant.
2185  llvm::Type *Ty, bool usgn,
2186  const char *name) {
2187  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
2188 
2189  int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
2190  int EltSize = VTy->getScalarSizeInBits();
2191 
2192  Vec = Builder.CreateBitCast(Vec, Ty);
2193 
2194  // lshr/ashr are undefined when the shift amount is equal to the vector
2195  // element size.
2196  if (ShiftAmt == EltSize) {
2197  if (usgn) {
2198  // Right-shifting an unsigned value by its size yields 0.
2199  return llvm::ConstantAggregateZero::get(VTy);
2200  } else {
2201  // Right-shifting a signed value by its size is equivalent
2202  // to a shift of size-1.
2203  --ShiftAmt;
2204  Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
2205  }
2206  }
2207 
2208  Shift = EmitNeonShiftVector(Shift, Ty, false);
2209  if (usgn)
2210  return Builder.CreateLShr(Vec, Shift, name);
2211  else
2212  return Builder.CreateAShr(Vec, Shift, name);
2213 }
2214 
2215 enum {
2216  AddRetType = (1 << 0),
2217  Add1ArgType = (1 << 1),
2218  Add2ArgTypes = (1 << 2),
2219 
2220  VectorizeRetType = (1 << 3),
2221  VectorizeArgTypes = (1 << 4),
2222 
2223  InventFloatType = (1 << 5),
2224  UnsignedAlts = (1 << 6),
2225 
2226  Use64BitVectors = (1 << 7),
2227  Use128BitVectors = (1 << 8),
2228 
2235 };
2236 
2237 namespace {
2238 struct NeonIntrinsicInfo {
2239  const char *NameHint;
2240  unsigned BuiltinID;
2241  unsigned LLVMIntrinsic;
2242  unsigned AltLLVMIntrinsic;
2243  unsigned TypeModifier;
2244 
2245  bool operator<(unsigned RHSBuiltinID) const {
2246  return BuiltinID < RHSBuiltinID;
2247  }
2248  bool operator<(const NeonIntrinsicInfo &TE) const {
2249  return BuiltinID < TE.BuiltinID;
2250  }
2251 };
2252 } // end anonymous namespace
2253 
2254 #define NEONMAP0(NameBase) \
2255  { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
2256 
2257 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
2258  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2259  Intrinsic::LLVMIntrinsic, 0, TypeModifier }
2260 
2261 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
2262  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2263  Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
2264  TypeModifier }
2265 
2266 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
2267  NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2268  NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2269  NEONMAP1(vabs_v, arm_neon_vabs, 0),
2270  NEONMAP1(vabsq_v, arm_neon_vabs, 0),
2271  NEONMAP0(vaddhn_v),
2272  NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
2273  NEONMAP1(vaeseq_v, arm_neon_aese, 0),
2274  NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
2275  NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
2276  NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
2277  NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
2278  NEONMAP1(vcage_v, arm_neon_vacge, 0),
2279  NEONMAP1(vcageq_v, arm_neon_vacge, 0),
2280  NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
2281  NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
2282  NEONMAP1(vcale_v, arm_neon_vacge, 0),
2283  NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
2284  NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
2285  NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
2286  NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
2287  NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
2288  NEONMAP1(vclz_v, ctlz, Add1ArgType),
2289  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2290  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2291  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2292  NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
2293  NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
2294  NEONMAP0(vcvt_f32_v),
2295  NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2296  NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2297  NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2298  NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2299  NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2300  NEONMAP0(vcvt_s32_v),
2301  NEONMAP0(vcvt_s64_v),
2302  NEONMAP0(vcvt_u32_v),
2303  NEONMAP0(vcvt_u64_v),
2304  NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
2305  NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
2306  NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
2307  NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
2308  NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
2309  NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
2310  NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
2311  NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
2312  NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
2313  NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
2314  NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
2315  NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
2316  NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
2317  NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
2318  NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
2319  NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
2320  NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
2321  NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
2322  NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
2323  NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
2324  NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
2325  NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
2326  NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
2327  NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
2328  NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
2329  NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
2330  NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
2331  NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
2332  NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
2333  NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
2334  NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
2335  NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
2336  NEONMAP0(vcvtq_f32_v),
2337  NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2338  NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2339  NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2340  NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2341  NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2342  NEONMAP0(vcvtq_s32_v),
2343  NEONMAP0(vcvtq_s64_v),
2344  NEONMAP0(vcvtq_u32_v),
2345  NEONMAP0(vcvtq_u64_v),
2346  NEONMAP0(vext_v),
2347  NEONMAP0(vextq_v),
2348  NEONMAP0(vfma_v),
2349  NEONMAP0(vfmaq_v),
2350  NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2351  NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2352  NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2353  NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2354  NEONMAP0(vld1_dup_v),
2355  NEONMAP1(vld1_v, arm_neon_vld1, 0),
2356  NEONMAP0(vld1q_dup_v),
2357  NEONMAP1(vld1q_v, arm_neon_vld1, 0),
2358  NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
2359  NEONMAP1(vld2_v, arm_neon_vld2, 0),
2360  NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
2361  NEONMAP1(vld2q_v, arm_neon_vld2, 0),
2362  NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
2363  NEONMAP1(vld3_v, arm_neon_vld3, 0),
2364  NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
2365  NEONMAP1(vld3q_v, arm_neon_vld3, 0),
2366  NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
2367  NEONMAP1(vld4_v, arm_neon_vld4, 0),
2368  NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
2369  NEONMAP1(vld4q_v, arm_neon_vld4, 0),
2370  NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2371  NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
2372  NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
2373  NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2374  NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2375  NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
2376  NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
2377  NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2378  NEONMAP0(vmovl_v),
2379  NEONMAP0(vmovn_v),
2380  NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
2381  NEONMAP0(vmull_v),
2382  NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
2383  NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2384  NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2385  NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
2386  NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2387  NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2388  NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
2389  NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
2390  NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
2391  NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
2392  NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
2393  NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2394  NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2395  NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
2396  NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
2397  NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
2398  NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
2399  NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
2400  NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
2401  NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
2402  NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
2403  NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
2404  NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
2405  NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
2406  NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2407  NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2408  NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2409  NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2410  NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2411  NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2412  NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
2413  NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
2414  NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2415  NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2416  NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
2417  NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2418  NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2419  NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
2420  NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
2421  NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2422  NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2423  NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
2424  NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
2425  NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
2426  NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
2427  NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
2428  NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
2429  NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
2430  NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
2431  NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
2432  NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
2433  NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
2434  NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
2435  NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2436  NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2437  NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2438  NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2439  NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2440  NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2441  NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
2442  NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
2443  NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
2444  NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
2445  NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
2446  NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
2447  NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
2448  NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
2449  NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
2450  NEONMAP0(vshl_n_v),
2451  NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2452  NEONMAP0(vshll_n_v),
2453  NEONMAP0(vshlq_n_v),
2454  NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2455  NEONMAP0(vshr_n_v),
2456  NEONMAP0(vshrn_n_v),
2457  NEONMAP0(vshrq_n_v),
2458  NEONMAP1(vst1_v, arm_neon_vst1, 0),
2459  NEONMAP1(vst1q_v, arm_neon_vst1, 0),
2460  NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
2461  NEONMAP1(vst2_v, arm_neon_vst2, 0),
2462  NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
2463  NEONMAP1(vst2q_v, arm_neon_vst2, 0),
2464  NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
2465  NEONMAP1(vst3_v, arm_neon_vst3, 0),
2466  NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
2467  NEONMAP1(vst3q_v, arm_neon_vst3, 0),
2468  NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
2469  NEONMAP1(vst4_v, arm_neon_vst4, 0),
2470  NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
2471  NEONMAP1(vst4q_v, arm_neon_vst4, 0),
2472  NEONMAP0(vsubhn_v),
2473  NEONMAP0(vtrn_v),
2474  NEONMAP0(vtrnq_v),
2475  NEONMAP0(vtst_v),
2476  NEONMAP0(vtstq_v),
2477  NEONMAP0(vuzp_v),
2478  NEONMAP0(vuzpq_v),
2479  NEONMAP0(vzip_v),
2480  NEONMAP0(vzipq_v)
2481 };
2482 
2483 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
2484  NEONMAP1(vabs_v, aarch64_neon_abs, 0),
2485  NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
2486  NEONMAP0(vaddhn_v),
2487  NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
2488  NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
2489  NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
2490  NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
2491  NEONMAP1(vcage_v, aarch64_neon_facge, 0),
2492  NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
2493  NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
2494  NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
2495  NEONMAP1(vcale_v, aarch64_neon_facge, 0),
2496  NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
2497  NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
2498  NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
2499  NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
2500  NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
2501  NEONMAP1(vclz_v, ctlz, Add1ArgType),
2502  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2503  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2504  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2505  NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
2506  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
2507  NEONMAP0(vcvt_f32_v),
2508  NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2509  NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2510  NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2511  NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2512  NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2513  NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2514  NEONMAP0(vcvtq_f32_v),
2515  NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2516  NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2517  NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2518  NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2519  NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2520  NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2521  NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
2522  NEONMAP0(vext_v),
2523  NEONMAP0(vextq_v),
2524  NEONMAP0(vfma_v),
2525  NEONMAP0(vfmaq_v),
2526  NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2527  NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2528  NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2529  NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2530  NEONMAP0(vmovl_v),
2531  NEONMAP0(vmovn_v),
2532  NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
2533  NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
2534  NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
2535  NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2536  NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2537  NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
2538  NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
2539  NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
2540  NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2541  NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2542  NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
2543  NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
2544  NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
2545  NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
2546  NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
2547  NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
2548  NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
2549  NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
2550  NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
2551  NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
2552  NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
2553  NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2554  NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2555  NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
2556  NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2557  NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
2558  NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2559  NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
2560  NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
2561  NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2562  NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2563  NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
2564  NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2565  NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2566  NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
2567  NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
2568  NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2569  NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2570  NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2571  NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2572  NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2573  NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2574  NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2575  NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2576  NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
2577  NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
2578  NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
2579  NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
2580  NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
2581  NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
2582  NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
2583  NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
2584  NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
2585  NEONMAP0(vshl_n_v),
2586  NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
2587  NEONMAP0(vshll_n_v),
2588  NEONMAP0(vshlq_n_v),
2589  NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
2590  NEONMAP0(vshr_n_v),
2591  NEONMAP0(vshrn_n_v),
2592  NEONMAP0(vshrq_n_v),
2593  NEONMAP0(vsubhn_v),
2594  NEONMAP0(vtst_v),
2595  NEONMAP0(vtstq_v),
2596 };
2597 
2598 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
2599  NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
2600  NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
2601  NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
2602  NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
2603  NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
2604  NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
2605  NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
2606  NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
2607  NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
2608  NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2609  NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
2610  NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
2611  NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
2612  NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
2613  NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2614  NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2615  NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
2616  NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
2617  NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
2618  NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
2619  NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
2620  NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
2621  NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
2622  NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
2623  NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
2624  NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
2625  NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
2626  NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
2627  NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
2628  NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
2629  NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
2630  NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
2631  NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
2632  NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
2633  NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
2634  NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
2635  NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
2636  NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
2637  NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
2638  NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
2639  NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
2640  NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
2641  NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
2642  NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
2643  NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
2644  NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
2645  NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
2646  NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
2647  NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
2648  NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2649  NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2650  NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2651  NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2652  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
2653  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
2654  NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2655  NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2656  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
2657  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
2658  NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2659  NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2660  NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2661  NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
2662  NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
2663  NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
2664  NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
2665  NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
2666  NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
2667  NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
2668  NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
2669  NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
2670  NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
2671  NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2672  NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
2673  NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2674  NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
2675  NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2676  NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
2677  NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2678  NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
2679  NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
2680  NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
2681  NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
2682  NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
2683  NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
2684  NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
2685  NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
2686  NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
2687  NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
2688  NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
2689  NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
2690  NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
2691  NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
2692  NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
2693  NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
2694  NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
2695  NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
2696  NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
2697  NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
2698  NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
2699  NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
2700  NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
2701  NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
2702  NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
2703  NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
2704  NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
2705  NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
2706  NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
2707  NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
2708  NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
2709  NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
2710  NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
2711  NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
2712  NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
2713  NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
2714  NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
2715  NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
2716  NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
2717  NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
2718  NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
2719  NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
2720  NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
2721  NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
2722  NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
2723  NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
2724  NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
2725  NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
2726  NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
2727  NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
2728  NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
2729  NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2730  NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2731  NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2732  NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2733  NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
2734  NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
2735  NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2736  NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2737  NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
2738  NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
2739  NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
2740  NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
2741  NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
2742  NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
2743  NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
2744  NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
2745  NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
2746  NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
2747  NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
2748  NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
2749  NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
2750  NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
2751  NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
2752  NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
2753  NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
2754  NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
2755  NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
2756  NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
2757  NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
2758  NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
2759  NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
2760  NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
2761  NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
2762  NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
2763  NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
2764  NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
2765  NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
2766  NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
2767  NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
2768  NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
2769  NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
2770  NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
2771  NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
2772  NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
2773  NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
2774  NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
2775  NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
2776  NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
2777  NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
2778  NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
2779  NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
2780  NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
2781  NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
2782  NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
2783  NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
2784  NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
2785  NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
2786  NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
2787  NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
2788  NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
2789  NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
2790  NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
2791 };
2792 
2793 #undef NEONMAP0
2794 #undef NEONMAP1
2795 #undef NEONMAP2
2796 
2798 
2801 
2802 
2803 static const NeonIntrinsicInfo *
2805  unsigned BuiltinID, bool &MapProvenSorted) {
2806 
2807 #ifndef NDEBUG
2808  if (!MapProvenSorted) {
2809  assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
2810  MapProvenSorted = true;
2811  }
2812 #endif
2813 
2814  const NeonIntrinsicInfo *Builtin =
2815  std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
2816 
2817  if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
2818  return Builtin;
2819 
2820  return nullptr;
2821 }
2822 
2823 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
2824  unsigned Modifier,
2825  llvm::Type *ArgType,
2826  const CallExpr *E) {
2827  int VectorSize = 0;
2828  if (Modifier & Use64BitVectors)
2829  VectorSize = 64;
2830  else if (Modifier & Use128BitVectors)
2831  VectorSize = 128;
2832 
2833  // Return type.
2835  if (Modifier & AddRetType) {
2837  if (Modifier & VectorizeRetType)
2838  Ty = llvm::VectorType::get(
2839  Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
2840 
2841  Tys.push_back(Ty);
2842  }
2843 
2844  // Arguments.
2845  if (Modifier & VectorizeArgTypes) {
2846  int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
2847  ArgType = llvm::VectorType::get(ArgType, Elts);
2848  }
2849 
2850  if (Modifier & (Add1ArgType | Add2ArgTypes))
2851  Tys.push_back(ArgType);
2852 
2853  if (Modifier & Add2ArgTypes)
2854  Tys.push_back(ArgType);
2855 
2856  if (Modifier & InventFloatType)
2857  Tys.push_back(FloatTy);
2858 
2859  return CGM.getIntrinsic(IntrinsicID, Tys);
2860 }
2861 
2863  const NeonIntrinsicInfo &SISDInfo,
2865  const CallExpr *E) {
2866  unsigned BuiltinID = SISDInfo.BuiltinID;
2867  unsigned int Int = SISDInfo.LLVMIntrinsic;
2868  unsigned Modifier = SISDInfo.TypeModifier;
2869  const char *s = SISDInfo.NameHint;
2870 
2871  switch (BuiltinID) {
2872  case NEON::BI__builtin_neon_vcled_s64:
2873  case NEON::BI__builtin_neon_vcled_u64:
2874  case NEON::BI__builtin_neon_vcles_f32:
2875  case NEON::BI__builtin_neon_vcled_f64:
2876  case NEON::BI__builtin_neon_vcltd_s64:
2877  case NEON::BI__builtin_neon_vcltd_u64:
2878  case NEON::BI__builtin_neon_vclts_f32:
2879  case NEON::BI__builtin_neon_vcltd_f64:
2880  case NEON::BI__builtin_neon_vcales_f32:
2881  case NEON::BI__builtin_neon_vcaled_f64:
2882  case NEON::BI__builtin_neon_vcalts_f32:
2883  case NEON::BI__builtin_neon_vcaltd_f64:
2884  // Only one direction of comparisons actually exist, cmle is actually a cmge
2885  // with swapped operands. The table gives us the right intrinsic but we
2886  // still need to do the swap.
2887  std::swap(Ops[0], Ops[1]);
2888  break;
2889  }
2890 
2891  assert(Int && "Generic code assumes a valid intrinsic");
2892 
2893  // Determine the type(s) of this overloaded AArch64 intrinsic.
2894  const Expr *Arg = E->getArg(0);
2895  llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
2896  Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
2897 
2898  int j = 0;
2899  ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
2900  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
2901  ai != ae; ++ai, ++j) {
2902  llvm::Type *ArgTy = ai->getType();
2903  if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
2904  ArgTy->getPrimitiveSizeInBits())
2905  continue;
2906 
2907  assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
2908  // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
2909  // it before inserting.
2910  Ops[j] =
2911  CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
2912  Ops[j] =
2913  CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
2914  }
2915 
2916  Value *Result = CGF.EmitNeonCall(F, Ops, s);
2917  llvm::Type *ResultType = CGF.ConvertType(E->getType());
2918  if (ResultType->getPrimitiveSizeInBits() <
2919  Result->getType()->getPrimitiveSizeInBits())
2920  return CGF.Builder.CreateExtractElement(Result, C0);
2921 
2922  return CGF.Builder.CreateBitCast(Result, ResultType, s);
2923 }
2924 
2926  unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
2927  const char *NameHint, unsigned Modifier, const CallExpr *E,
2928  SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
2929  // Get the last argument, which specifies the vector type.
2930  llvm::APSInt NeonTypeConst;
2931  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
2932  if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
2933  return nullptr;
2934 
2935  // Determine the type of this overloaded NEON intrinsic.
2936  NeonTypeFlags Type(NeonTypeConst.getZExtValue());
2937  bool Usgn = Type.isUnsigned();
2938  bool Quad = Type.isQuad();
2939 
2940  llvm::VectorType *VTy = GetNeonType(this, Type);
2941  llvm::Type *Ty = VTy;
2942  if (!Ty)
2943  return nullptr;
2944 
2945  auto getAlignmentValue32 = [&](Address addr) -> Value* {
2946  return Builder.getInt32(addr.getAlignment().getQuantity());
2947  };
2948 
2949  unsigned Int = LLVMIntrinsic;
2950  if ((Modifier & UnsignedAlts) && !Usgn)
2951  Int = AltLLVMIntrinsic;
2952 
2953  switch (BuiltinID) {
2954  default: break;
2955  case NEON::BI__builtin_neon_vabs_v:
2956  case NEON::BI__builtin_neon_vabsq_v:
2957  if (VTy->getElementType()->isFloatingPointTy())
2958  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
2959  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
2960  case NEON::BI__builtin_neon_vaddhn_v: {
2961  llvm::VectorType *SrcTy =
2962  llvm::VectorType::getExtendedElementVectorType(VTy);
2963 
2964  // %sum = add <4 x i32> %lhs, %rhs
2965  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
2966  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
2967  Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
2968 
2969  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
2970  Constant *ShiftAmt =
2971  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
2972  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
2973 
2974  // %res = trunc <4 x i32> %high to <4 x i16>
2975  return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
2976  }
2977  case NEON::BI__builtin_neon_vcale_v:
2978  case NEON::BI__builtin_neon_vcaleq_v:
2979  case NEON::BI__builtin_neon_vcalt_v:
2980  case NEON::BI__builtin_neon_vcaltq_v:
2981  std::swap(Ops[0], Ops[1]);
2982  case NEON::BI__builtin_neon_vcage_v:
2983  case NEON::BI__builtin_neon_vcageq_v:
2984  case NEON::BI__builtin_neon_vcagt_v:
2985  case NEON::BI__builtin_neon_vcagtq_v: {
2986  llvm::Type *VecFlt = llvm::VectorType::get(
2987  VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
2988  VTy->getNumElements());
2989  llvm::Type *Tys[] = { VTy, VecFlt };
2990  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
2991  return EmitNeonCall(F, Ops, NameHint);
2992  }
2993  case NEON::BI__builtin_neon_vclz_v:
2994  case NEON::BI__builtin_neon_vclzq_v:
2995  // We generate target-independent intrinsic, which needs a second argument
2996  // for whether or not clz of zero is undefined; on ARM it isn't.
2997  Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
2998  break;
2999  case NEON::BI__builtin_neon_vcvt_f32_v:
3000  case NEON::BI__builtin_neon_vcvtq_f32_v:
3001  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3002  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3003  return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3004  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3005  case NEON::BI__builtin_neon_vcvt_n_f32_v:
3006  case NEON::BI__builtin_neon_vcvt_n_f64_v:
3007  case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3008  case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3009  llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3010  Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3011  Function *F = CGM.getIntrinsic(Int, Tys);
3012  return EmitNeonCall(F, Ops, "vcvt_n");
3013  }
3014  case NEON::BI__builtin_neon_vcvt_n_s32_v:
3015  case NEON::BI__builtin_neon_vcvt_n_u32_v:
3016  case NEON::BI__builtin_neon_vcvt_n_s64_v:
3017  case NEON::BI__builtin_neon_vcvt_n_u64_v:
3018  case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3019  case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3020  case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3021  case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3022  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3023  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3024  return EmitNeonCall(F, Ops, "vcvt_n");
3025  }
3026  case NEON::BI__builtin_neon_vcvt_s32_v:
3027  case NEON::BI__builtin_neon_vcvt_u32_v:
3028  case NEON::BI__builtin_neon_vcvt_s64_v:
3029  case NEON::BI__builtin_neon_vcvt_u64_v:
3030  case NEON::BI__builtin_neon_vcvtq_s32_v:
3031  case NEON::BI__builtin_neon_vcvtq_u32_v:
3032  case NEON::BI__builtin_neon_vcvtq_s64_v:
3033  case NEON::BI__builtin_neon_vcvtq_u64_v: {
3034  Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3035  return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3036  : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3037  }
3038  case NEON::BI__builtin_neon_vcvta_s32_v:
3039  case NEON::BI__builtin_neon_vcvta_s64_v:
3040  case NEON::BI__builtin_neon_vcvta_u32_v:
3041  case NEON::BI__builtin_neon_vcvta_u64_v:
3042  case NEON::BI__builtin_neon_vcvtaq_s32_v:
3043  case NEON::BI__builtin_neon_vcvtaq_s64_v:
3044  case NEON::BI__builtin_neon_vcvtaq_u32_v:
3045  case NEON::BI__builtin_neon_vcvtaq_u64_v:
3046  case NEON::BI__builtin_neon_vcvtn_s32_v:
3047  case NEON::BI__builtin_neon_vcvtn_s64_v:
3048  case NEON::BI__builtin_neon_vcvtn_u32_v:
3049  case NEON::BI__builtin_neon_vcvtn_u64_v:
3050  case NEON::BI__builtin_neon_vcvtnq_s32_v:
3051  case NEON::BI__builtin_neon_vcvtnq_s64_v:
3052  case NEON::BI__builtin_neon_vcvtnq_u32_v:
3053  case NEON::BI__builtin_neon_vcvtnq_u64_v:
3054  case NEON::BI__builtin_neon_vcvtp_s32_v:
3055  case NEON::BI__builtin_neon_vcvtp_s64_v:
3056  case NEON::BI__builtin_neon_vcvtp_u32_v:
3057  case NEON::BI__builtin_neon_vcvtp_u64_v:
3058  case NEON::BI__builtin_neon_vcvtpq_s32_v:
3059  case NEON::BI__builtin_neon_vcvtpq_s64_v:
3060  case NEON::BI__builtin_neon_vcvtpq_u32_v:
3061  case NEON::BI__builtin_neon_vcvtpq_u64_v:
3062  case NEON::BI__builtin_neon_vcvtm_s32_v:
3063  case NEON::BI__builtin_neon_vcvtm_s64_v:
3064  case NEON::BI__builtin_neon_vcvtm_u32_v:
3065  case NEON::BI__builtin_neon_vcvtm_u64_v:
3066  case NEON::BI__builtin_neon_vcvtmq_s32_v:
3067  case NEON::BI__builtin_neon_vcvtmq_s64_v:
3068  case NEON::BI__builtin_neon_vcvtmq_u32_v:
3069  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3070  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3071  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3072  }
3073  case NEON::BI__builtin_neon_vext_v:
3074  case NEON::BI__builtin_neon_vextq_v: {
3075  int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3077  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3078  Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
3079 
3080  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3081  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3082  Value *SV = llvm::ConstantVector::get(Indices);
3083  return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
3084  }
3085  case NEON::BI__builtin_neon_vfma_v:
3086  case NEON::BI__builtin_neon_vfmaq_v: {
3088  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3089  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3090  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3091 
3092  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3093  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
3094  }
3095  case NEON::BI__builtin_neon_vld1_v:
3096  case NEON::BI__builtin_neon_vld1q_v: {
3097  llvm::Type *Tys[] = {Ty, Int8PtrTy};
3098  Ops.push_back(getAlignmentValue32(PtrOp0));
3099  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
3100  }
3101  case NEON::BI__builtin_neon_vld2_v:
3102  case NEON::BI__builtin_neon_vld2q_v:
3103  case NEON::BI__builtin_neon_vld3_v:
3104  case NEON::BI__builtin_neon_vld3q_v:
3105  case NEON::BI__builtin_neon_vld4_v:
3106  case NEON::BI__builtin_neon_vld4q_v: {
3107  llvm::Type *Tys[] = {Ty, Int8PtrTy};
3108  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3109  Value *Align = getAlignmentValue32(PtrOp1);
3110  Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
3111  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3112  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3113  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3114  }
3115  case NEON::BI__builtin_neon_vld1_dup_v:
3116  case NEON::BI__builtin_neon_vld1q_dup_v: {
3117  Value *V = UndefValue::get(Ty);
3118  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3119  PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
3120  LoadInst *Ld = Builder.CreateLoad(PtrOp0);
3121  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3122  Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3123  return EmitNeonSplat(Ops[0], CI);
3124  }
3125  case NEON::BI__builtin_neon_vld2_lane_v:
3126  case NEON::BI__builtin_neon_vld2q_lane_v:
3127  case NEON::BI__builtin_neon_vld3_lane_v:
3128  case NEON::BI__builtin_neon_vld3q_lane_v:
3129  case NEON::BI__builtin_neon_vld4_lane_v:
3130  case NEON::BI__builtin_neon_vld4q_lane_v: {
3131  llvm::Type *Tys[] = {Ty, Int8PtrTy};
3132  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3133  for (unsigned I = 2; I < Ops.size() - 1; ++I)
3134  Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
3135  Ops.push_back(getAlignmentValue32(PtrOp1));
3136  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
3137  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3138  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3139  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3140  }
3141  case NEON::BI__builtin_neon_vmovl_v: {
3142  llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
3143  Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
3144  if (Usgn)
3145  return Builder.CreateZExt(Ops[0], Ty, "vmovl");
3146  return Builder.CreateSExt(Ops[0], Ty, "vmovl");
3147  }
3148  case NEON::BI__builtin_neon_vmovn_v: {
3149  llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3150  Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
3151  return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
3152  }
3153  case NEON::BI__builtin_neon_vmull_v:
3154  // FIXME: the integer vmull operations could be emitted in terms of pure
3155  // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
3156  // hoisting the exts outside loops. Until global ISel comes along that can
3157  // see through such movement this leads to bad CodeGen. So we need an
3158  // intrinsic for now.
3159  Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
3160  Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
3161  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
3162  case NEON::BI__builtin_neon_vpadal_v:
3163  case NEON::BI__builtin_neon_vpadalq_v: {
3164  // The source operand type has twice as many elements of half the size.
3165  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3166  llvm::Type *EltTy =
3167  llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3168  llvm::Type *NarrowTy =
3169  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3170  llvm::Type *Tys[2] = { Ty, NarrowTy };
3171  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
3172  }
3173  case NEON::BI__builtin_neon_vpaddl_v:
3174  case NEON::BI__builtin_neon_vpaddlq_v: {
3175  // The source operand type has twice as many elements of half the size.
3176  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3177  llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3178  llvm::Type *NarrowTy =
3179  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3180  llvm::Type *Tys[2] = { Ty, NarrowTy };
3181  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
3182  }
3183  case NEON::BI__builtin_neon_vqdmlal_v:
3184  case NEON::BI__builtin_neon_vqdmlsl_v: {
3185  SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
3186  Ops[1] =
3187  EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
3188  Ops.resize(2);
3189  return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
3190  }
3191  case NEON::BI__builtin_neon_vqshl_n_v:
3192  case NEON::BI__builtin_neon_vqshlq_n_v:
3193  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
3194  1, false);
3195  case NEON::BI__builtin_neon_vqshlu_n_v:
3196  case NEON::BI__builtin_neon_vqshluq_n_v:
3197  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
3198  1, false);
3199  case NEON::BI__builtin_neon_vrecpe_v:
3200  case NEON::BI__builtin_neon_vrecpeq_v:
3201  case NEON::BI__builtin_neon_vrsqrte_v:
3202  case NEON::BI__builtin_neon_vrsqrteq_v:
3203  Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
3204  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
3205 
3206  case NEON::BI__builtin_neon_vrshr_n_v:
3207  case NEON::BI__builtin_neon_vrshrq_n_v:
3208  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
3209  1, true);
3210  case NEON::BI__builtin_neon_vshl_n_v:
3211  case NEON::BI__builtin_neon_vshlq_n_v:
3212  Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
3213  return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
3214  "vshl_n");
3215  case NEON::BI__builtin_neon_vshll_n_v: {
3216  llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
3217  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3218  if (Usgn)
3219  Ops[0] = Builder.CreateZExt(Ops[0], VTy);
3220  else
3221  Ops[0] = Builder.CreateSExt(Ops[0], VTy);
3222  Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
3223  return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
3224  }
3225  case NEON::BI__builtin_neon_vshrn_n_v: {
3226  llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3227  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3228  Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
3229  if (Usgn)
3230  Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
3231  else
3232  Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
3233  return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
3234  }
3235  case NEON::BI__builtin_neon_vshr_n_v:
3236  case NEON::BI__builtin_neon_vshrq_n_v:
3237  return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
3238  case NEON::BI__builtin_neon_vst1_v:
3239  case NEON::BI__builtin_neon_vst1q_v:
3240  case NEON::BI__builtin_neon_vst2_v:
3241  case NEON::BI__builtin_neon_vst2q_v:
3242  case NEON::BI__builtin_neon_vst3_v:
3243  case NEON::BI__builtin_neon_vst3q_v:
3244  case NEON::BI__builtin_neon_vst4_v:
3245  case NEON::BI__builtin_neon_vst4q_v:
3246  case NEON::BI__builtin_neon_vst2_lane_v:
3247  case NEON::BI__builtin_neon_vst2q_lane_v:
3248  case NEON::BI__builtin_neon_vst3_lane_v:
3249  case NEON::BI__builtin_neon_vst3q_lane_v:
3250  case NEON::BI__builtin_neon_vst4_lane_v:
3251  case NEON::BI__builtin_neon_vst4q_lane_v: {
3252  llvm::Type *Tys[] = {Int8PtrTy, Ty};
3253  Ops.push_back(getAlignmentValue32(PtrOp0));
3254  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
3255  }
3256  case NEON::BI__builtin_neon_vsubhn_v: {
3257  llvm::VectorType *SrcTy =
3258  llvm::VectorType::getExtendedElementVectorType(VTy);
3259 
3260  // %sum = add <4 x i32> %lhs, %rhs
3261  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3262  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3263  Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
3264 
3265  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3266  Constant *ShiftAmt =
3267  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3268  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
3269 
3270  // %res = trunc <4 x i32> %high to <4 x i16>
3271  return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
3272  }
3273  case NEON::BI__builtin_neon_vtrn_v:
3274  case NEON::BI__builtin_neon_vtrnq_v: {
3275  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3276  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3277  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3278  Value *SV = nullptr;
3279 
3280  for (unsigned vi = 0; vi != 2; ++vi) {
3282  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3283  Indices.push_back(Builder.getInt32(i+vi));
3284  Indices.push_back(Builder.getInt32(i+e+vi));
3285  }
3286  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3287  SV = llvm::ConstantVector::get(Indices);
3288  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
3289  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3290  }
3291  return SV;
3292  }
3293  case NEON::BI__builtin_neon_vtst_v:
3294  case NEON::BI__builtin_neon_vtstq_v: {
3295  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3296  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3297  Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
3298  Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
3299  ConstantAggregateZero::get(Ty));
3300  return Builder.CreateSExt(Ops[0], Ty, "vtst");
3301  }
3302  case NEON::BI__builtin_neon_vuzp_v:
3303  case NEON::BI__builtin_neon_vuzpq_v: {
3304  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3305  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3306  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3307  Value *SV = nullptr;
3308 
3309  for (unsigned vi = 0; vi != 2; ++vi) {
3311  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3312  Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
3313 
3314  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3315  SV = llvm::ConstantVector::get(Indices);
3316  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
3317  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3318  }
3319  return SV;
3320  }
3321  case NEON::BI__builtin_neon_vzip_v:
3322  case NEON::BI__builtin_neon_vzipq_v: {
3323  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3324  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3325  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3326  Value *SV = nullptr;
3327 
3328  for (unsigned vi = 0; vi != 2; ++vi) {
3330  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3331  Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
3332  Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
3333  }
3334  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3335  SV = llvm::ConstantVector::get(Indices);
3336  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
3337  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3338  }
3339  return SV;
3340  }
3341  }
3342 
3343  assert(Int && "Expected valid intrinsic number");
3344 
3345  // Determine the type(s) of this overloaded AArch64 intrinsic.
3346  Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
3347 
3348  Value *Result = EmitNeonCall(F, Ops, NameHint);
3349  llvm::Type *ResultType = ConvertType(E->getType());
3350  // AArch64 intrinsic one-element vector type cast to
3351  // scalar type expected by the builtin
3352  return Builder.CreateBitCast(Result, ResultType, NameHint);
3353 }
3354 
3356  Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
3357  const CmpInst::Predicate Ip, const Twine &Name) {
3358  llvm::Type *OTy = Op->getType();
3359 
3360  // FIXME: this is utterly horrific. We should not be looking at previous
3361  // codegen context to find out what needs doing. Unfortunately TableGen
3362  // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
3363  // (etc).
3364  if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
3365  OTy = BI->getOperand(0)->getType();
3366 
3367  Op = Builder.CreateBitCast(Op, OTy);
3368  if (OTy->getScalarType()->isFloatingPointTy()) {
3369  Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
3370  } else {
3371  Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
3372  }
3373  return Builder.CreateSExt(Op, Ty, Name);
3374 }
3375 
3377  Value *ExtOp, Value *IndexOp,
3378  llvm::Type *ResTy, unsigned IntID,
3379  const char *Name) {
3380  SmallVector<Value *, 2> TblOps;
3381  if (ExtOp)
3382  TblOps.push_back(ExtOp);
3383 
3384  // Build a vector containing sequential number like (0, 1, 2, ..., 15)
3386  llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
3387  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
3388  Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i));
3389  Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1));
3390  }
3391  Value *SV = llvm::ConstantVector::get(Indices);
3392 
3393  int PairPos = 0, End = Ops.size() - 1;
3394  while (PairPos < End) {
3395  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3396  Ops[PairPos+1], SV, Name));
3397  PairPos += 2;
3398  }
3399 
3400  // If there's an odd number of 64-bit lookup table, fill the high 64-bit
3401  // of the 128-bit lookup table with zero.
3402  if (PairPos == End) {
3403  Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
3404  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3405  ZeroTbl, SV, Name));
3406  }
3407 
3408  Function *TblF;
3409  TblOps.push_back(IndexOp);
3410  TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
3411 
3412  return CGF.EmitNeonCall(TblF, TblOps, Name);
3413 }
3414 
3415 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
3416  unsigned Value;
3417  switch (BuiltinID) {
3418  default:
3419  return nullptr;
3420  case ARM::BI__builtin_arm_nop:
3421  Value = 0;
3422  break;
3423  case ARM::BI__builtin_arm_yield:
3424  case ARM::BI__yield:
3425  Value = 1;
3426  break;
3427  case ARM::BI__builtin_arm_wfe:
3428  case ARM::BI__wfe:
3429  Value = 2;
3430  break;
3431  case ARM::BI__builtin_arm_wfi:
3432  case ARM::BI__wfi:
3433  Value = 3;
3434  break;
3435  case ARM::BI__builtin_arm_sev:
3436  case ARM::BI__sev:
3437  Value = 4;
3438  break;
3439  case ARM::BI__builtin_arm_sevl:
3440  case ARM::BI__sevl:
3441  Value = 5;
3442  break;
3443  }
3444 
3445  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
3446  llvm::ConstantInt::get(Int32Ty, Value));
3447 }
3448 
3449 // Generates the IR for the read/write special register builtin,
3450 // ValueType is the type of the value that is to be written or read,
3451 // RegisterType is the type of the register being written to or read from.
3453  const CallExpr *E,
3454  llvm::Type *RegisterType,
3455  llvm::Type *ValueType, bool IsRead) {
3456  // write and register intrinsics only support 32 and 64 bit operations.
3457  assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
3458  && "Unsupported size for register.");
3459 
3461  CodeGen::CodeGenModule &CGM = CGF.CGM;
3462  LLVMContext &Context = CGM.getLLVMContext();
3463 
3464  const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
3465  StringRef SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
3466 
3467  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
3468  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
3469  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
3470 
3471  llvm::Type *Types[] = { RegisterType };
3472 
3473  bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
3474  assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
3475  && "Can't fit 64-bit value in 32-bit register");
3476 
3477  if (IsRead) {
3478  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
3479  llvm::Value *Call = Builder.CreateCall(F, Metadata);
3480 
3481  if (MixedTypes)
3482  // Read into 64 bit register and then truncate result to 32 bit.
3483  return Builder.CreateTrunc(Call, ValueType);
3484 
3485  if (ValueType->isPointerTy())
3486  // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
3487  return Builder.CreateIntToPtr(Call, ValueType);
3488 
3489  return Call;
3490  }
3491 
3492  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
3493  llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
3494  if (MixedTypes) {
3495  // Extend 32 bit write value to 64 bit to pass to write.
3496  ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
3497  return Builder.CreateCall(F, { Metadata, ArgValue });
3498  }
3499 
3500  if (ValueType->isPointerTy()) {
3501  // Have VoidPtrTy ArgValue but want to return an i32/i64.
3502  ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
3503  return Builder.CreateCall(F, { Metadata, ArgValue });
3504  }
3505 
3506  return Builder.CreateCall(F, { Metadata, ArgValue });
3507 }
3508 
3509 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
3510 /// argument that specifies the vector type.
3511 static bool HasExtraNeonArgument(unsigned BuiltinID) {
3512  switch (BuiltinID) {
3513  default: break;
3514  case NEON::BI__builtin_neon_vget_lane_i8:
3515  case NEON::BI__builtin_neon_vget_lane_i16:
3516  case NEON::BI__builtin_neon_vget_lane_i32:
3517  case NEON::BI__builtin_neon_vget_lane_i64:
3518  case NEON::BI__builtin_neon_vget_lane_f32:
3519  case NEON::BI__builtin_neon_vgetq_lane_i8:
3520  case NEON::BI__builtin_neon_vgetq_lane_i16:
3521  case NEON::BI__builtin_neon_vgetq_lane_i32:
3522  case NEON::BI__builtin_neon_vgetq_lane_i64:
3523  case NEON::BI__builtin_neon_vgetq_lane_f32:
3524  case NEON::BI__builtin_neon_vset_lane_i8:
3525  case NEON::BI__builtin_neon_vset_lane_i16:
3526  case NEON::BI__builtin_neon_vset_lane_i32:
3527  case NEON::BI__builtin_neon_vset_lane_i64:
3528  case NEON::BI__builtin_neon_vset_lane_f32:
3529  case NEON::BI__builtin_neon_vsetq_lane_i8:
3530  case NEON::BI__builtin_neon_vsetq_lane_i16:
3531  case NEON::BI__builtin_neon_vsetq_lane_i32:
3532  case NEON::BI__builtin_neon_vsetq_lane_i64:
3533  case NEON::BI__builtin_neon_vsetq_lane_f32:
3534  case NEON::BI__builtin_neon_vsha1h_u32:
3535  case NEON::BI__builtin_neon_vsha1cq_u32:
3536  case NEON::BI__builtin_neon_vsha1pq_u32:
3537  case NEON::BI__builtin_neon_vsha1mq_u32:
3538  case ARM::BI_MoveToCoprocessor:
3539  case ARM::BI_MoveToCoprocessor2:
3540  return false;
3541  }
3542  return true;
3543 }
3544 
3545 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
3546  const CallExpr *E) {
3547  if (auto Hint = GetValueForARMHint(BuiltinID))
3548  return Hint;
3549 
3550  if (BuiltinID == ARM::BI__emit) {
3551  bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
3552  llvm::FunctionType *FTy =
3553  llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
3554 
3555  APSInt Value;
3556  if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
3557  llvm_unreachable("Sema will ensure that the parameter is constant");
3558 
3559  uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
3560 
3561  llvm::InlineAsm *Emit =
3562  IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
3563  /*SideEffects=*/true)
3564  : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
3565  /*SideEffects=*/true);
3566 
3567  return Builder.CreateCall(Emit);
3568  }
3569 
3570  if (BuiltinID == ARM::BI__builtin_arm_dbg) {
3571  Value *Option = EmitScalarExpr(E->getArg(0));
3572  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
3573  }
3574 
3575  if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
3576  Value *Address = EmitScalarExpr(E->getArg(0));
3577  Value *RW = EmitScalarExpr(E->getArg(1));
3578  Value *IsData = EmitScalarExpr(E->getArg(2));
3579 
3580  // Locality is not supported on ARM target
3581  Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
3582 
3583  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
3584  return Builder.CreateCall(F, {Address, RW, Locality, IsData});
3585  }
3586 
3587  if (BuiltinID == ARM::BI__builtin_arm_rbit) {
3588  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit),
3589  EmitScalarExpr(E->getArg(0)),
3590  "rbit");
3591  }
3592 
3593  if (BuiltinID == ARM::BI__clear_cache) {
3594  assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
3595  const FunctionDecl *FD = E->getDirectCallee();
3596  Value *Ops[2];
3597  for (unsigned i = 0; i < 2; i++)
3598  Ops[i] = EmitScalarExpr(E->getArg(i));
3599  llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
3600  llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
3601  StringRef Name = FD->getName();
3602  return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
3603  }
3604 
3605  if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
3606  ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
3607  BuiltinID == ARM::BI__builtin_arm_ldaex) &&
3608  getContext().getTypeSize(E->getType()) == 64) ||
3609  BuiltinID == ARM::BI__ldrexd) {
3610  Function *F;
3611 
3612  switch (BuiltinID) {
3613  default: llvm_unreachable("unexpected builtin");
3614  case ARM::BI__builtin_arm_ldaex:
3615  F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
3616  break;
3617  case ARM::BI__builtin_arm_ldrexd:
3618  case ARM::BI__builtin_arm_ldrex:
3619  case ARM::BI__ldrexd:
3620  F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
3621  break;
3622  }
3623 
3624  Value *LdPtr = EmitScalarExpr(E->getArg(0));
3625  Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
3626  "ldrexd");
3627 
3628  Value *Val0 = Builder.CreateExtractValue(Val, 1);
3629  Value *Val1 = Builder.CreateExtractValue(Val, 0);
3630  Val0 = Builder.CreateZExt(Val0, Int64Ty);
3631  Val1 = Builder.CreateZExt(Val1, Int64Ty);
3632 
3633  Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
3634  Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
3635  Val = Builder.CreateOr(Val, Val1);
3636  return Builder.CreateBitCast(Val, ConvertType(E->getType()));
3637  }
3638 
3639  if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
3640  BuiltinID == ARM::BI__builtin_arm_ldaex) {
3641  Value *LoadAddr = EmitScalarExpr(E->getArg(0));
3642 
3643  QualType Ty = E->getType();
3644  llvm::Type *RealResTy = ConvertType(Ty);
3645  llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
3646  getContext().getTypeSize(Ty));
3647  LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
3648 
3649  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
3650  ? Intrinsic::arm_ldaex
3651  : Intrinsic::arm_ldrex,
3652  LoadAddr->getType());
3653  Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
3654 
3655  if (RealResTy->isPointerTy())
3656  return Builder.CreateIntToPtr(Val, RealResTy);
3657  else {
3658  Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
3659  return Builder.CreateBitCast(Val, RealResTy);
3660  }
3661  }
3662 
3663  if (BuiltinID == ARM::BI__builtin_arm_strexd ||
3664  ((BuiltinID == ARM::BI__builtin_arm_stlex ||
3665  BuiltinID == ARM::BI__builtin_arm_strex) &&
3666  getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
3667  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
3668  ? Intrinsic::arm_stlexd
3669  : Intrinsic::arm_strexd);
3670  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
3671 
3672  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
3673  Value *Val = EmitScalarExpr(E->getArg(0));
3674  Builder.CreateStore(Val, Tmp);
3675 
3676  Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
3677  Val = Builder.CreateLoad(LdPtr);
3678 
3679  Value *Arg0 = Builder.CreateExtractValue(Val, 0);
3680  Value *Arg1 = Builder.CreateExtractValue(Val, 1);
3681  Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
3682  return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
3683  }
3684 
3685  if (BuiltinID == ARM::BI__builtin_arm_strex ||
3686  BuiltinID == ARM::BI__builtin_arm_stlex) {
3687  Value *StoreVal = EmitScalarExpr(E->getArg(0));
3688  Value *StoreAddr = EmitScalarExpr(E->getArg(1));
3689 
3690  QualType Ty = E->getArg(0)->getType();
3691  llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
3692  getContext().getTypeSize(Ty));
3693  StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
3694 
3695  if (StoreVal->getType()->isPointerTy())
3696  StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
3697  else {
3698  StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
3699  StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
3700  }
3701 
3702  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
3703  ? Intrinsic::arm_stlex
3704  : Intrinsic::arm_strex,
3705  StoreAddr->getType());
3706  return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
3707  }
3708 
3709  if (BuiltinID == ARM::BI__builtin_arm_clrex) {
3710  Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
3711  return Builder.CreateCall(F);
3712  }
3713 
3714  // CRC32
3715  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
3716  switch (BuiltinID) {
3717  case ARM::BI__builtin_arm_crc32b:
3718  CRCIntrinsicID = Intrinsic::arm_crc32b; break;
3719  case ARM::BI__builtin_arm_crc32cb:
3720  CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
3721  case ARM::BI__builtin_arm_crc32h:
3722  CRCIntrinsicID = Intrinsic::arm_crc32h; break;
3723  case ARM::BI__builtin_arm_crc32ch:
3724  CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
3725  case ARM::BI__builtin_arm_crc32w:
3726  case ARM::BI__builtin_arm_crc32d:
3727  CRCIntrinsicID = Intrinsic::arm_crc32w; break;
3728  case ARM::BI__builtin_arm_crc32cw:
3729  case ARM::BI__builtin_arm_crc32cd:
3730  CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
3731  }
3732 
3733  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
3734  Value *Arg0 = EmitScalarExpr(E->getArg(0));
3735  Value *Arg1 = EmitScalarExpr(E->getArg(1));
3736 
3737  // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
3738  // intrinsics, hence we need different codegen for these cases.
3739  if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
3740  BuiltinID == ARM::BI__builtin_arm_crc32cd) {
3741  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
3742  Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
3743  Value *Arg1b = Builder.CreateLShr(Arg1, C1);
3744  Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
3745 
3746  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3747  Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
3748  return Builder.CreateCall(F, {Res, Arg1b});
3749  } else {
3750  Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
3751 
3752  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
3753  return Builder.CreateCall(F, {Arg0, Arg1});
3754  }
3755  }
3756 
3757  if (BuiltinID == ARM::BI__builtin_arm_rsr ||
3758  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
3759  BuiltinID == ARM::BI__builtin_arm_rsrp ||
3760  BuiltinID == ARM::BI__builtin_arm_wsr ||
3761  BuiltinID == ARM::BI__builtin_arm_wsr64 ||
3762  BuiltinID == ARM::BI__builtin_arm_wsrp) {
3763 
3764  bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
3765  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
3766  BuiltinID == ARM::BI__builtin_arm_rsrp;
3767 
3768  bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
3769  BuiltinID == ARM::BI__builtin_arm_wsrp;
3770 
3771  bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
3772  BuiltinID == ARM::BI__builtin_arm_wsr64;
3773 
3774  llvm::Type *ValueType;
3775  llvm::Type *RegisterType;
3776  if (IsPointerBuiltin) {
3777  ValueType = VoidPtrTy;
3778  RegisterType = Int32Ty;
3779  } else if (Is64Bit) {
3780  ValueType = RegisterType = Int64Ty;
3781  } else {
3782  ValueType = RegisterType = Int32Ty;
3783  }
3784 
3785  return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
3786  }
3787 
3788  // Find out if any arguments are required to be integer constant
3789  // expressions.
3790  unsigned ICEArguments = 0;
3792  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
3793  assert(Error == ASTContext::GE_None && "Should not codegen an error");
3794 
3795  auto getAlignmentValue32 = [&](Address addr) -> Value* {
3796  return Builder.getInt32(addr.getAlignment().getQuantity());
3797  };
3798 
3799  Address PtrOp0 = Address::invalid();
3800  Address PtrOp1 = Address::invalid();
3802  bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
3803  unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
3804  for (unsigned i = 0, e = NumArgs; i != e; i++) {
3805  if (i == 0) {
3806  switch (BuiltinID) {
3807  case NEON::BI__builtin_neon_vld1_v:
3808  case NEON::BI__builtin_neon_vld1q_v:
3809  case NEON::BI__builtin_neon_vld1q_lane_v:
3810  case NEON::BI__builtin_neon_vld1_lane_v:
3811  case NEON::BI__builtin_neon_vld1_dup_v:
3812  case NEON::BI__builtin_neon_vld1q_dup_v:
3813  case NEON::BI__builtin_neon_vst1_v:
3814  case NEON::BI__builtin_neon_vst1q_v:
3815  case NEON::BI__builtin_neon_vst1q_lane_v:
3816  case NEON::BI__builtin_neon_vst1_lane_v:
3817  case NEON::BI__builtin_neon_vst2_v:
3818  case NEON::BI__builtin_neon_vst2q_v:
3819  case NEON::BI__builtin_neon_vst2_lane_v:
3820  case NEON::BI__builtin_neon_vst2q_lane_v:
3821  case NEON::BI__builtin_neon_vst3_v:
3822  case NEON::BI__builtin_neon_vst3q_v:
3823  case NEON::BI__builtin_neon_vst3_lane_v:
3824  case NEON::BI__builtin_neon_vst3q_lane_v:
3825  case NEON::BI__builtin_neon_vst4_v:
3826  case NEON::BI__builtin_neon_vst4q_v:
3827  case NEON::BI__builtin_neon_vst4_lane_v:
3828  case NEON::BI__builtin_neon_vst4q_lane_v:
3829  // Get the alignment for the argument in addition to the value;
3830  // we'll use it later.
3831  PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
3832  Ops.push_back(PtrOp0.getPointer());
3833  continue;
3834  }
3835  }
3836  if (i == 1) {
3837  switch (BuiltinID) {
3838  case NEON::BI__builtin_neon_vld2_v:
3839  case NEON::BI__builtin_neon_vld2q_v:
3840  case NEON::BI__builtin_neon_vld3_v:
3841  case NEON::BI__builtin_neon_vld3q_v:
3842  case NEON::BI__builtin_neon_vld4_v:
3843  case NEON::BI__builtin_neon_vld4q_v:
3844  case NEON::BI__builtin_neon_vld2_lane_v:
3845  case NEON::BI__builtin_neon_vld2q_lane_v:
3846  case NEON::BI__builtin_neon_vld3_lane_v:
3847  case NEON::BI__builtin_neon_vld3q_lane_v:
3848  case NEON::BI__builtin_neon_vld4_lane_v:
3849  case NEON::BI__builtin_neon_vld4q_lane_v:
3850  case NEON::BI__builtin_neon_vld2_dup_v:
3851  case NEON::BI__builtin_neon_vld3_dup_v:
3852  case NEON::BI__builtin_neon_vld4_dup_v:
3853  // Get the alignment for the argument in addition to the value;
3854  // we'll use it later.
3855  PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
3856  Ops.push_back(PtrOp1.getPointer());
3857  continue;
3858  }
3859  }
3860 
3861  if ((ICEArguments & (1 << i)) == 0) {
3862  Ops.push_back(EmitScalarExpr(E->getArg(i)));
3863  } else {
3864  // If this is required to be a constant, constant fold it so that we know
3865  // that the generated intrinsic gets a ConstantInt.
3866  llvm::APSInt Result;
3867  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
3868  assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
3869  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
3870  }
3871  }
3872 
3873  switch (BuiltinID) {
3874  default: break;
3875 
3876  case NEON::BI__builtin_neon_vget_lane_i8:
3877  case NEON::BI__builtin_neon_vget_lane_i16:
3878  case NEON::BI__builtin_neon_vget_lane_i32:
3879  case NEON::BI__builtin_neon_vget_lane_i64:
3880  case NEON::BI__builtin_neon_vget_lane_f32:
3881  case NEON::BI__builtin_neon_vgetq_lane_i8:
3882  case NEON::BI__builtin_neon_vgetq_lane_i16:
3883  case NEON::BI__builtin_neon_vgetq_lane_i32:
3884  case NEON::BI__builtin_neon_vgetq_lane_i64:
3885  case NEON::BI__builtin_neon_vgetq_lane_f32:
3886  return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
3887 
3888  case NEON::BI__builtin_neon_vset_lane_i8:
3889  case NEON::BI__builtin_neon_vset_lane_i16:
3890  case NEON::BI__builtin_neon_vset_lane_i32:
3891  case NEON::BI__builtin_neon_vset_lane_i64:
3892  case NEON::BI__builtin_neon_vset_lane_f32:
3893  case NEON::BI__builtin_neon_vsetq_lane_i8:
3894  case NEON::BI__builtin_neon_vsetq_lane_i16:
3895  case NEON::BI__builtin_neon_vsetq_lane_i32:
3896  case NEON::BI__builtin_neon_vsetq_lane_i64:
3897  case NEON::BI__builtin_neon_vsetq_lane_f32:
3898  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
3899 
3900  case NEON::BI__builtin_neon_vsha1h_u32:
3901  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
3902  "vsha1h");
3903  case NEON::BI__builtin_neon_vsha1cq_u32:
3904  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
3905  "vsha1h");
3906  case NEON::BI__builtin_neon_vsha1pq_u32:
3907  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
3908  "vsha1h");
3909  case NEON::BI__builtin_neon_vsha1mq_u32:
3910  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
3911  "vsha1h");
3912 
3913  // The ARM _MoveToCoprocessor builtins put the input register value as
3914  // the first argument, but the LLVM intrinsic expects it as the third one.
3915  case ARM::BI_MoveToCoprocessor:
3916  case ARM::BI_MoveToCoprocessor2: {
3917  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
3918  Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
3919  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
3920  Ops[3], Ops[4], Ops[5]});
3921  }
3922  }
3923 
3924  // Get the last argument, which specifies the vector type.
3925  assert(HasExtraArg);
3926  llvm::APSInt Result;
3927  const Expr *Arg = E->getArg(E->getNumArgs()-1);
3928  if (!Arg->isIntegerConstantExpr(Result, getContext()))
3929  return nullptr;
3930 
3931  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
3932  BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
3933  // Determine the overloaded type of this builtin.
3934  llvm::Type *Ty;
3935  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
3936  Ty = FloatTy;
3937  else
3938  Ty = DoubleTy;
3939 
3940  // Determine whether this is an unsigned conversion or not.
3941  bool usgn = Result.getZExtValue() == 1;
3942  unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
3943 
3944  // Call the appropriate intrinsic.
3945  Function *F = CGM.getIntrinsic(Int, Ty);
3946  return Builder.CreateCall(F, Ops, "vcvtr");
3947  }
3948 
3949  // Determine the type of this overloaded NEON intrinsic.
3950  NeonTypeFlags Type(Result.getZExtValue());
3951  bool usgn = Type.isUnsigned();
3952  bool rightShift = false;
3953 
3954  llvm::VectorType *VTy = GetNeonType(this, Type);
3955  llvm::Type *Ty = VTy;
3956  if (!Ty)
3957  return nullptr;
3958 
3959  // Many NEON builtins have identical semantics and uses in ARM and
3960  // AArch64. Emit these in a single function.
3961  auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
3962  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
3963  IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
3964  if (Builtin)
3966  Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
3967  Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
3968 
3969  unsigned Int;
3970  switch (BuiltinID) {
3971  default: return nullptr;
3972  case NEON::BI__builtin_neon_vld1q_lane_v:
3973  // Handle 64-bit integer elements as a special case. Use shuffles of
3974  // one-element vectors to avoid poor code for i64 in the backend.
3975  if (VTy->getElementType()->isIntegerTy(64)) {
3976  // Extract the other lane.
3977  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3978  uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
3979  Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
3980  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
3981  // Load the value as a one-element vector.
3982  Ty = llvm::VectorType::get(VTy->getElementType(), 1);
3983  llvm::Type *Tys[] = {Ty, Int8PtrTy};
3984  Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
3985  Value *Align = getAlignmentValue32(PtrOp0);
3986  Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
3987  // Combine them.
3988  uint32_t Indices[] = {1 - Lane, Lane};
3989  SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
3990  return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
3991  }
3992  // fall through
3993  case NEON::BI__builtin_neon_vld1_lane_v: {
3994  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3995  PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
3996  Value *Ld = Builder.CreateLoad(PtrOp0);
3997  return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
3998  }
3999  case NEON::BI__builtin_neon_vld2_dup_v:
4000  case NEON::BI__builtin_neon_vld3_dup_v:
4001  case NEON::BI__builtin_neon_vld4_dup_v: {
4002  // Handle 64-bit elements as a special-case. There is no "dup" needed.
4003  if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
4004  switch (BuiltinID) {
4005  case NEON::BI__builtin_neon_vld2_dup_v:
4006  Int = Intrinsic::arm_neon_vld2;
4007  break;
4008  case NEON::BI__builtin_neon_vld3_dup_v:
4009  Int = Intrinsic::arm_neon_vld3;
4010  break;
4011  case NEON::BI__builtin_neon_vld4_dup_v:
4012  Int = Intrinsic::arm_neon_vld4;
4013  break;
4014  default: llvm_unreachable("unknown vld_dup intrinsic?");
4015  }
4016  llvm::Type *Tys[] = {Ty, Int8PtrTy};
4017  Function *F = CGM.getIntrinsic(Int, Tys);
4018  llvm::Value *Align = getAlignmentValue32(PtrOp1);
4019  Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
4020  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4021  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4022  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4023  }
4024  switch (BuiltinID) {
4025  case NEON::BI__builtin_neon_vld2_dup_v:
4026  Int = Intrinsic::arm_neon_vld2lane;
4027  break;
4028  case NEON::BI__builtin_neon_vld3_dup_v:
4029  Int = Intrinsic::arm_neon_vld3lane;
4030  break;
4031  case NEON::BI__builtin_neon_vld4_dup_v:
4032  Int = Intrinsic::arm_neon_vld4lane;
4033  break;
4034  default: llvm_unreachable("unknown vld_dup intrinsic?");
4035  }
4036  llvm::Type *Tys[] = {Ty, Int8PtrTy};
4037  Function *F = CGM.getIntrinsic(Int, Tys);
4038  llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
4039 
4041  Args.push_back(Ops[1]);
4042  Args.append(STy->getNumElements(), UndefValue::get(Ty));
4043 
4044  llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
4045  Args.push_back(CI);
4046  Args.push_back(getAlignmentValue32(PtrOp1));
4047 
4048  Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
4049  // splat lane 0 to all elts in each vector of the result.
4050  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
4051  Value *Val = Builder.CreateExtractValue(Ops[1], i);
4052  Value *Elt = Builder.CreateBitCast(Val, Ty);
4053  Elt = EmitNeonSplat(Elt, CI);
4054  Elt = Builder.CreateBitCast(Elt, Val->getType());
4055  Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
4056  }
4057  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4058  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4059  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4060  }
4061  case NEON::BI__builtin_neon_vqrshrn_n_v:
4062  Int =
4063  usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
4064  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
4065  1, true);
4066  case NEON::BI__builtin_neon_vqrshrun_n_v:
4067  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
4068  Ops, "vqrshrun_n", 1, true);
4069  case NEON::BI__builtin_neon_vqshrn_n_v:
4070  Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
4071  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
4072  1, true);
4073  case NEON::BI__builtin_neon_vqshrun_n_v:
4074  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
4075  Ops, "vqshrun_n", 1, true);
4076  case NEON::BI__builtin_neon_vrecpe_v:
4077  case NEON::BI__builtin_neon_vrecpeq_v:
4078  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
4079  Ops, "vrecpe");
4080  case NEON::BI__builtin_neon_vrshrn_n_v:
4081  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
4082  Ops, "vrshrn_n", 1, true);
4083  case NEON::BI__builtin_neon_vrsra_n_v:
4084  case NEON::BI__builtin_neon_vrsraq_n_v:
4085  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4086  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4087  Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
4088  Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
4089  Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
4090  return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
4091  case NEON::BI__builtin_neon_vsri_n_v:
4092  case NEON::BI__builtin_neon_vsriq_n_v:
4093  rightShift = true;
4094  case NEON::BI__builtin_neon_vsli_n_v:
4095  case NEON::BI__builtin_neon_vsliq_n_v:
4096  Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
4097  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
4098  Ops, "vsli_n");
4099  case NEON::BI__builtin_neon_vsra_n_v:
4100  case NEON::BI__builtin_neon_vsraq_n_v:
4101  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4102  Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
4103  return Builder.CreateAdd(Ops[0], Ops[1]);
4104  case NEON::BI__builtin_neon_vst1q_lane_v:
4105  // Handle 64-bit integer elements as a special case. Use a shuffle to get
4106  // a one-element vector and avoid poor code for i64 in the backend.
4107  if (VTy->getElementType()->isIntegerTy(64)) {
4108  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4109  Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
4110  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4111  Ops[2] = getAlignmentValue32(PtrOp0);
4112  llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
4113  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
4114  Tys), Ops);
4115  }
4116  // fall through
4117  case NEON::BI__builtin_neon_vst1_lane_v: {
4118  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4119  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
4120  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4121  auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
4122  return St;
4123  }
4124  case NEON::BI__builtin_neon_vtbl1_v:
4125  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
4126  Ops, "vtbl1");
4127  case NEON::BI__builtin_neon_vtbl2_v:
4128  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
4129  Ops, "vtbl2");
4130  case NEON::BI__builtin_neon_vtbl3_v:
4131  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
4132  Ops, "vtbl3");
4133  case NEON::BI__builtin_neon_vtbl4_v:
4134  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
4135  Ops, "vtbl4");
4136  case NEON::BI__builtin_neon_vtbx1_v:
4137  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
4138  Ops, "vtbx1");
4139  case NEON::BI__builtin_neon_vtbx2_v:
4140  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
4141  Ops, "vtbx2");
4142  case NEON::BI__builtin_neon_vtbx3_v:
4143  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
4144  Ops, "vtbx3");
4145  case NEON::BI__builtin_neon_vtbx4_v:
4146  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
4147  Ops, "vtbx4");
4148  }
4149 }
4150 
4151 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
4152  const CallExpr *E,
4153  SmallVectorImpl<Value *> &Ops) {
4154  unsigned int Int = 0;
4155  const char *s = nullptr;
4156 
4157  switch (BuiltinID) {
4158  default:
4159  return nullptr;
4160  case NEON::BI__builtin_neon_vtbl1_v:
4161  case NEON::BI__builtin_neon_vqtbl1_v:
4162  case NEON::BI__builtin_neon_vqtbl1q_v:
4163  case NEON::BI__builtin_neon_vtbl2_v:
4164  case NEON::BI__builtin_neon_vqtbl2_v:
4165  case NEON::BI__builtin_neon_vqtbl2q_v:
4166  case NEON::BI__builtin_neon_vtbl3_v:
4167  case NEON::BI__builtin_neon_vqtbl3_v:
4168  case NEON::BI__builtin_neon_vqtbl3q_v:
4169  case NEON::BI__builtin_neon_vtbl4_v:
4170  case NEON::BI__builtin_neon_vqtbl4_v:
4171  case NEON::BI__builtin_neon_vqtbl4q_v:
4172  break;
4173  case NEON::BI__builtin_neon_vtbx1_v:
4174  case NEON::BI__builtin_neon_vqtbx1_v:
4175  case NEON::BI__builtin_neon_vqtbx1q_v:
4176  case NEON::BI__builtin_neon_vtbx2_v:
4177  case NEON::BI__builtin_neon_vqtbx2_v:
4178  case NEON::BI__builtin_neon_vqtbx2q_v:
4179  case NEON::BI__builtin_neon_vtbx3_v:
4180  case NEON::BI__builtin_neon_vqtbx3_v:
4181  case NEON::BI__builtin_neon_vqtbx3q_v:
4182  case NEON::BI__builtin_neon_vtbx4_v:
4183  case NEON::BI__builtin_neon_vqtbx4_v:
4184  case NEON::BI__builtin_neon_vqtbx4q_v:
4185  break;
4186  }
4187 
4188  assert(E->getNumArgs() >= 3);
4189 
4190  // Get the last argument, which specifies the vector type.
4191  llvm::APSInt Result;
4192  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
4193  if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
4194  return nullptr;
4195 
4196  // Determine the type of this overloaded NEON intrinsic.
4197  NeonTypeFlags Type(Result.getZExtValue());
4198  llvm::VectorType *Ty = GetNeonType(&CGF, Type);
4199  if (!Ty)
4200  return nullptr;
4201 
4203 
4204  // AArch64 scalar builtins are not overloaded, they do not have an extra
4205  // argument that specifies the vector type, need to handle each case.
4206  switch (BuiltinID) {
4207  case NEON::BI__builtin_neon_vtbl1_v: {
4208  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
4209  Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
4210  "vtbl1");
4211  }
4212  case NEON::BI__builtin_neon_vtbl2_v: {
4213  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
4214  Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
4215  "vtbl1");
4216  }
4217  case NEON::BI__builtin_neon_vtbl3_v: {
4218  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
4219  Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
4220  "vtbl2");
4221  }
4222  case NEON::BI__builtin_neon_vtbl4_v: {
4223  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
4224  Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
4225  "vtbl2");
4226  }
4227  case NEON::BI__builtin_neon_vtbx1_v: {
4228  Value *TblRes =
4229  packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
4230  Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
4231 
4232  llvm::Constant *EightV = ConstantInt::get(Ty, 8);
4233  Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
4234  CmpRes = Builder.CreateSExt(CmpRes, Ty);
4235 
4236  Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4237  Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4238  return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4239  }
4240  case NEON::BI__builtin_neon_vtbx2_v: {
4241  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
4242  Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
4243  "vtbx1");
4244  }
4245  case NEON::BI__builtin_neon_vtbx3_v: {
4246  Value *TblRes =
4247  packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
4248  Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
4249 
4250  llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
4251  Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
4252  TwentyFourV);
4253  CmpRes = Builder.CreateSExt(CmpRes, Ty);
4254 
4255  Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4256  Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4257  return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4258  }
4259  case NEON::BI__builtin_neon_vtbx4_v: {
4260  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
4261  Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
4262  "vtbx2");
4263  }
4264  case NEON::BI__builtin_neon_vqtbl1_v:
4265  case NEON::BI__builtin_neon_vqtbl1q_v:
4266  Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
4267  case NEON::BI__builtin_neon_vqtbl2_v:
4268  case NEON::BI__builtin_neon_vqtbl2q_v: {
4269  Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
4270  case NEON::BI__builtin_neon_vqtbl3_v:
4271  case NEON::BI__builtin_neon_vqtbl3q_v:
4272  Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
4273  case NEON::BI__builtin_neon_vqtbl4_v:
4274  case NEON::BI__builtin_neon_vqtbl4q_v:
4275  Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
4276  case NEON::BI__builtin_neon_vqtbx1_v:
4277  case NEON::BI__builtin_neon_vqtbx1q_v:
4278  Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
4279  case NEON::BI__builtin_neon_vqtbx2_v:
4280  case NEON::BI__builtin_neon_vqtbx2q_v:
4281  Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
4282  case NEON::BI__builtin_neon_vqtbx3_v:
4283  case NEON::BI__builtin_neon_vqtbx3q_v:
4284  Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
4285  case NEON::BI__builtin_neon_vqtbx4_v:
4286  case NEON::BI__builtin_neon_vqtbx4q_v:
4287  Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
4288  }
4289  }
4290 
4291  if (!Int)
4292  return nullptr;
4293 
4294  Function *F = CGF.CGM.getIntrinsic(Int, Ty);
4295  return CGF.EmitNeonCall(F, Ops, s);
4296 }
4297 
4299  llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
4300  Op = Builder.CreateBitCast(Op, Int16Ty);
4301  Value *V = UndefValue::get(VTy);
4302  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4303  Op = Builder.CreateInsertElement(V, Op, CI);
4304  return Op;
4305 }
4306 
4308  const CallExpr *E) {
4309  unsigned HintID = static_cast<unsigned>(-1);
4310  switch (BuiltinID) {
4311  default: break;
4312  case AArch64::BI__builtin_arm_nop:
4313  HintID = 0;
4314  break;
4315  case AArch64::BI__builtin_arm_yield:
4316  HintID = 1;
4317  break;
4318  case AArch64::BI__builtin_arm_wfe:
4319  HintID = 2;
4320  break;
4321  case AArch64::BI__builtin_arm_wfi:
4322  HintID = 3;
4323  break;
4324  case AArch64::BI__builtin_arm_sev:
4325  HintID = 4;
4326  break;
4327  case AArch64::BI__builtin_arm_sevl:
4328  HintID = 5;
4329  break;
4330  }
4331 
4332  if (HintID != static_cast<unsigned>(-1)) {
4333  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
4334  return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
4335  }
4336 
4337  if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
4338  Value *Address = EmitScalarExpr(E->getArg(0));
4339  Value *RW = EmitScalarExpr(E->getArg(1));
4340  Value *CacheLevel = EmitScalarExpr(E->getArg(2));
4341  Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
4342  Value *IsData = EmitScalarExpr(E->getArg(4));
4343 
4344  Value *Locality = nullptr;
4345  if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
4346  // Temporal fetch, needs to convert cache level to locality.
4347  Locality = llvm::ConstantInt::get(Int32Ty,
4348  -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
4349  } else {
4350  // Streaming fetch.
4351  Locality = llvm::ConstantInt::get(Int32Ty, 0);
4352  }
4353 
4354  // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
4355  // PLDL3STRM or PLDL2STRM.
4356  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4357  return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4358  }
4359 
4360  if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
4361  assert((getContext().getTypeSize(E->getType()) == 32) &&
4362  "rbit of unusual size!");
4363  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4364  return Builder.CreateCall(
4365  CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4366  }
4367  if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
4368  assert((getContext().getTypeSize(E->getType()) == 64) &&
4369  "rbit of unusual size!");
4370  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4371  return Builder.CreateCall(
4372  CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4373  }
4374 
4375  if (BuiltinID == AArch64::BI__clear_cache) {
4376  assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4377  const FunctionDecl *FD = E->getDirectCallee();
4378  Value *Ops[2];
4379  for (unsigned i = 0; i < 2; i++)
4380  Ops[i] = EmitScalarExpr(E->getArg(i));
4381  llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4382  llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4383  StringRef Name = FD->getName();
4384  return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4385  }
4386 
4387  if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4388  BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
4389  getContext().getTypeSize(E->getType()) == 128) {
4390  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4391  ? Intrinsic::aarch64_ldaxp
4392  : Intrinsic::aarch64_ldxp);
4393 
4394  Value *LdPtr = EmitScalarExpr(E->getArg(0));
4395  Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4396  "ldxp");
4397 
4398  Value *Val0 = Builder.CreateExtractValue(Val, 1);
4399  Value *Val1 = Builder.CreateExtractValue(Val, 0);
4400  llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
4401  Val0 = Builder.CreateZExt(Val0, Int128Ty);
4402  Val1 = Builder.CreateZExt(Val1, Int128Ty);
4403 
4404  Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
4405  Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4406  Val = Builder.CreateOr(Val, Val1);
4407  return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4408  } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4409  BuiltinID == AArch64::BI__builtin_arm_ldaex) {
4410  Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4411 
4412  QualType Ty = E->getType();
4413  llvm::Type *RealResTy = ConvertType(Ty);
4414  llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
4415  getContext().getTypeSize(Ty));
4416  LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
4417 
4418  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4419  ? Intrinsic::aarch64_ldaxr
4420  : Intrinsic::aarch64_ldxr,
4421  LoadAddr->getType());
4422  Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
4423 
4424  if (RealResTy->isPointerTy())
4425  return Builder.CreateIntToPtr(Val, RealResTy);
4426 
4427  Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4428  return Builder.CreateBitCast(Val, RealResTy);
4429  }
4430 
4431  if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
4432  BuiltinID == AArch64::BI__builtin_arm_stlex) &&
4433  getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
4434  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4435  ? Intrinsic::aarch64_stlxp
4436  : Intrinsic::aarch64_stxp);
4437  llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
4438 
4439  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4440  EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
4441 
4442  Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
4443  llvm::Value *Val = Builder.CreateLoad(Tmp);
4444 
4445  Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4446  Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4447  Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
4448  Int8PtrTy);
4449  return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
4450  }
4451 
4452  if (BuiltinID == AArch64::BI__builtin_arm_strex ||
4453  BuiltinID == AArch64::BI__builtin_arm_stlex) {
4454  Value *StoreVal = EmitScalarExpr(E->getArg(0));
4455  Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4456 
4457  QualType Ty = E->getArg(0)->getType();
4458  llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4459  getContext().getTypeSize(Ty));
4460  StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4461 
4462  if (StoreVal->getType()->isPointerTy())
4463  StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
4464  else {
4465  StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
4466  StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
4467  }
4468 
4469  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4470  ? Intrinsic::aarch64_stlxr
4471  : Intrinsic::aarch64_stxr,
4472  StoreAddr->getType());
4473  return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
4474  }
4475 
4476  if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
4477  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
4478  return Builder.CreateCall(F);
4479  }
4480 
4481  if (BuiltinID == AArch64::BI__builtin_thread_pointer) {
4482  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_thread_pointer);
4483  return Builder.CreateCall(F);
4484  }
4485 
4486  // CRC32
4487  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4488  switch (BuiltinID) {
4489  case AArch64::BI__builtin_arm_crc32b:
4490  CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
4491  case AArch64::BI__builtin_arm_crc32cb:
4492  CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
4493  case AArch64::BI__builtin_arm_crc32h:
4494  CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
4495  case AArch64::BI__builtin_arm_crc32ch:
4496  CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
4497  case AArch64::BI__builtin_arm_crc32w:
4498  CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
4499  case AArch64::BI__builtin_arm_crc32cw:
4500  CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
4501  case AArch64::BI__builtin_arm_crc32d:
4502  CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
4503  case AArch64::BI__builtin_arm_crc32cd:
4504  CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
4505  }
4506 
4507  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4508  Value *Arg0 = EmitScalarExpr(E->getArg(0));
4509  Value *Arg1 = EmitScalarExpr(E->getArg(1));
4510  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4511 
4512  llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
4513  Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
4514 
4515  return Builder.CreateCall(F, {Arg0, Arg1});
4516  }
4517 
4518  if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
4519  BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
4520  BuiltinID == AArch64::BI__builtin_arm_rsrp ||
4521  BuiltinID == AArch64::BI__builtin_arm_wsr ||
4522  BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
4523  BuiltinID == AArch64::BI__builtin_arm_wsrp) {
4524 
4525  bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
4526  BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
4527  BuiltinID == AArch64::BI__builtin_arm_rsrp;
4528 
4529  bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
4530  BuiltinID == AArch64::BI__builtin_arm_wsrp;
4531 
4532  bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
4533  BuiltinID != AArch64::BI__builtin_arm_wsr;
4534 
4535  llvm::Type *ValueType;
4536  llvm::Type *RegisterType = Int64Ty;
4537  if (IsPointerBuiltin) {
4538  ValueType = VoidPtrTy;
4539  } else if (Is64Bit) {
4540  ValueType = Int64Ty;
4541  } else {
4542  ValueType = Int32Ty;
4543  }
4544 
4545  return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4546  }
4547 
4548  // Find out if any arguments are required to be integer constant
4549  // expressions.
4550  unsigned ICEArguments = 0;
4552  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4553  assert(Error == ASTContext::GE_None && "Should not codegen an error");
4554 
4556  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
4557  if ((ICEArguments & (1 << i)) == 0) {
4558  Ops.push_back(EmitScalarExpr(E->getArg(i)));
4559  } else {
4560  // If this is required to be a constant, constant fold it so that we know
4561  // that the generated intrinsic gets a ConstantInt.
4562  llvm::APSInt Result;
4563  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4564  assert(IsConst && "Constant arg isn't actually constant?");
4565  (void)IsConst;
4566  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4567  }
4568  }
4569 
4570  auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
4571  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4572  SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
4573 
4574  if (Builtin) {
4575  Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
4576  Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
4577  assert(Result && "SISD intrinsic should have been handled");
4578  return Result;
4579  }
4580 
4581  llvm::APSInt Result;
4582  const Expr *Arg = E->getArg(E->getNumArgs()-1);
4583  NeonTypeFlags Type(0);
4584  if (Arg->isIntegerConstantExpr(Result, getContext()))
4585  // Determine the type of this overloaded NEON intrinsic.
4586  Type = NeonTypeFlags(Result.getZExtValue());
4587 
4588  bool usgn = Type.isUnsigned();
4589  bool quad = Type.isQuad();
4590 
4591  // Handle non-overloaded intrinsics first.
4592  switch (BuiltinID) {
4593  default: break;
4594  case NEON::BI__builtin_neon_vldrq_p128: {
4595  llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
4596  Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
4597  return Builder.CreateDefaultAlignedLoad(Ptr);
4598  }
4599  case NEON::BI__builtin_neon_vstrq_p128: {
4600  llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
4601  Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
4603  }
4604  case NEON::BI__builtin_neon_vcvts_u32_f32:
4605  case NEON::BI__builtin_neon_vcvtd_u64_f64:
4606  usgn = true;
4607  // FALL THROUGH
4608  case NEON::BI__builtin_neon_vcvts_s32_f32:
4609  case NEON::BI__builtin_neon_vcvtd_s64_f64: {
4610  Ops.push_back(EmitScalarExpr(E->getArg(0)));
4611  bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
4612  llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
4613  llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
4614  Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
4615  if (usgn)
4616  return Builder.CreateFPToUI(Ops[0], InTy);
4617  return Builder.CreateFPToSI(Ops[0], InTy);
4618  }
4619  case NEON::BI__builtin_neon_vcvts_f32_u32:
4620  case NEON::BI__builtin_neon_vcvtd_f64_u64:
4621  usgn = true;
4622  // FALL THROUGH
4623  case NEON::BI__builtin_neon_vcvts_f32_s32:
4624  case NEON::BI__builtin_neon_vcvtd_f64_s64: {
4625  Ops.push_back(EmitScalarExpr(E->getArg(0)));
4626  bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
4627  llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
4628  llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
4629  Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
4630  if (usgn)
4631  return Builder.CreateUIToFP(Ops[0], FTy);
4632  return Builder.CreateSIToFP(Ops[0], FTy);
4633  }
4634  case NEON::BI__builtin_neon_vpaddd_s64: {
4635  llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
4636  Value *Vec = EmitScalarExpr(E->getArg(0));
4637  // The vector is v2f64, so make sure it's bitcast to that.
4638  Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
4639  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
4640  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
4641  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
4642  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
4643  // Pairwise addition of a v2f64 into a scalar f64.
4644  return Builder.CreateAdd(Op0, Op1, "vpaddd");
4645  }
4646  case NEON::BI__builtin_neon_vpaddd_f64: {
4647  llvm::Type *Ty =
4648  llvm::VectorType::get(DoubleTy, 2);
4649  Value *Vec = EmitScalarExpr(E->getArg(0));
4650  // The vector is v2f64, so make sure it's bitcast to that.
4651  Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
4652  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
4653  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
4654  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
4655  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
4656  // Pairwise addition of a v2f64 into a scalar f64.
4657  return Builder.CreateFAdd(Op0, Op1, "vpaddd");
4658  }
4659  case NEON::BI__builtin_neon_vpadds_f32: {
4660  llvm::Type *Ty =
4661  llvm::VectorType::get(FloatTy, 2);
4662  Value *Vec = EmitScalarExpr(E->getArg(0));
4663  // The vector is v2f32, so make sure it's bitcast to that.
4664  Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
4665  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
4666  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
4667  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
4668  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
4669  // Pairwise addition of a v2f32 into a scalar f32.
4670  return Builder.CreateFAdd(Op0, Op1, "vpaddd");
4671  }
4672  case NEON::BI__builtin_neon_vceqzd_s64:
4673  case NEON::BI__builtin_neon_vceqzd_f64:
4674  case NEON::BI__builtin_neon_vceqzs_f32:
4675  Ops.push_back(EmitScalarExpr(E->getArg(0)));
4677  Ops[0], ConvertType(E->getCallReturnType(getContext())),
4678  ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
4679  case NEON::BI__builtin_neon_vcgezd_s64:
4680  case NEON::BI__builtin_neon_vcgezd_f64:
4681  case NEON::BI__builtin_neon_vcgezs_f32:
4682  Ops.push_back(EmitScalarExpr(E->getArg(0)));
4684  Ops[0], ConvertType(E->getCallReturnType(getContext())),
4685  ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
4686  case NEON::BI__builtin_neon_vclezd_s64:
4687  case NEON::BI__builtin_neon_vclezd_f64:
4688  case NEON::BI__builtin_neon_vclezs_f32:
4689  Ops.push_back(EmitScalarExpr(E->getArg(0)));
4691  Ops[0], ConvertType(E->getCallReturnType(getContext())),
4692  ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
4693  case NEON::BI__builtin_neon_vcgtzd_s64:
4694  case NEON::BI__builtin_neon_vcgtzd_f64:
4695  case NEON::BI__builtin_neon_vcgtzs_f32:
4696  Ops.push_back(EmitScalarExpr(E->getArg(0)));
4698  Ops[0], ConvertType(E->getCallReturnType(getContext())),
4699  ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
4700  case NEON::BI__builtin_neon_vcltzd_s64:
4701  case NEON::BI__builtin_neon_vcltzd_f64:
4702  case NEON::BI__builtin_neon_vcltzs_f32:
4703  Ops.push_back(EmitScalarExpr(E->getArg(0)));
4705  Ops[0], ConvertType(E->getCallReturnType(getContext())),
4706  ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
4707 
4708  case NEON::BI__builtin_neon_vceqzd_u64: {
4709  Ops.push_back(EmitScalarExpr(E->getArg(0)));
4710  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
4711  Ops[0] =
4712  Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
4713  return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
4714  }
4715  case NEON::BI__builtin_neon_vceqd_f64:
4716  case NEON::BI__builtin_neon_vcled_f64:
4717  case NEON::BI__builtin_neon_vcltd_f64:
4718  case NEON::BI__builtin_neon_vcged_f64:
4719  case NEON::BI__builtin_neon_vcgtd_f64: {
4720  llvm::CmpInst::Predicate P;
4721  switch (BuiltinID) {
4722  default: llvm_unreachable("missing builtin ID in switch!");
4723  case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
4724  case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
4725  case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
4726  case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
4727  case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
4728  }
4729  Ops.push_back(EmitScalarExpr(E->getArg(1)));
4730  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
4731  Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
4732  Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
4733  return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
4734  }
4735  case NEON::BI__builtin_neon_vceqs_f32:
4736  case NEON::BI__builtin_neon_vcles_f32:
4737  case NEON::BI__builtin_neon_vclts_f32:
4738  case NEON::BI__builtin_neon_vcges_f32:
4739  case NEON::BI__builtin_neon_vcgts_f32: {
4740  llvm::CmpInst::Predicate P;
4741  switch (BuiltinID) {
4742  default: llvm_unreachable("missing builtin ID in switch!");
4743  case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
4744  case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
4745  case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
4746  case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
4747  case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
4748  }
4749  Ops.push_back(EmitScalarExpr(E->getArg(1)));
4750  Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
4751  Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
4752  Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
4753  return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
4754  }
4755  case NEON::BI__builtin_neon_vceqd_s64:
4756  case NEON::BI__builtin_neon_vceqd_u64:
4757  case NEON::BI__builtin_neon_vcgtd_s64:
4758  case NEON::BI__builtin_neon_vcgtd_u64:
4759  case NEON::BI__builtin_neon_vcltd_s64:
4760  case NEON::BI__builtin_neon_vcltd_u64:
4761  case NEON::BI__builtin_neon_vcged_u64:
4762  case NEON::BI__builtin_neon_vcged_s64:
4763  case NEON::BI__builtin_neon_vcled_u64:
4764  case NEON::BI__builtin_neon_vcled_s64: {
4765  llvm::CmpInst::Predicate P;
4766  switch (BuiltinID) {
4767  default: llvm_unreachable("missing builtin ID in switch!");
4768  case NEON::BI__builtin_neon_vceqd_s64:
4769  case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
4770  case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
4771  case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
4772  case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
4773  case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
4774  case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
4775  case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
4776  case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
4777  case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
4778  }
4779  Ops.push_back(EmitScalarExpr(E->getArg(1)));
4780  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
4781  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
4782  Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
4783  return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
4784  }
4785  case NEON::BI__builtin_neon_vtstd_s64:
4786  case NEON::BI__builtin_neon_vtstd_u64: {
4787  Ops.push_back(EmitScalarExpr(E->getArg(1)));
4788  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
4789  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
4790  Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4791  Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4792  llvm::Constant::getNullValue(Int64Ty));
4793  return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
4794  }
4795  case NEON::BI__builtin_neon_vset_lane_i8:
4796  case NEON::BI__builtin_neon_vset_lane_i16:
4797  case NEON::BI__builtin_neon_vset_lane_i32:
4798  case NEON::BI__builtin_neon_vset_lane_i64:
4799  case NEON::BI__builtin_neon_vset_lane_f32:
4800  case NEON::BI__builtin_neon_vsetq_lane_i8:
4801  case NEON::BI__builtin_neon_vsetq_lane_i16:
4802  case NEON::BI__builtin_neon_vsetq_lane_i32:
4803  case NEON::BI__builtin_neon_vsetq_lane_i64:
4804  case NEON::BI__builtin_neon_vsetq_lane_f32:
4805  Ops.push_back(EmitScalarExpr(E->getArg(2)));
4806  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4807  case NEON::BI__builtin_neon_vset_lane_f64:
4808  // The vector type needs a cast for the v1f64 variant.
4809  Ops[1] = Builder.CreateBitCast(Ops[1],
4810  llvm::VectorType::get(DoubleTy, 1));
4811  Ops.push_back(EmitScalarExpr(E->getArg(2)));
4812  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4813  case NEON::BI__builtin_neon_vsetq_lane_f64:
4814  // The vector type needs a cast for the v2f64 variant.
4815  Ops[1] = Builder.CreateBitCast(Ops[1],
4816  llvm::VectorType::get(DoubleTy, 2));
4817  Ops.push_back(EmitScalarExpr(E->getArg(2)));
4818  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4819 
4820  case NEON::BI__builtin_neon_vget_lane_i8:
4821  case NEON::BI__builtin_neon_vdupb_lane_i8:
4822  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
4823  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4824  "vget_lane");
4825  case NEON::BI__builtin_neon_vgetq_lane_i8:
4826  case NEON::BI__builtin_neon_vdupb_laneq_i8:
4827  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
4828  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4829  "vgetq_lane");
4830  case NEON::BI__builtin_neon_vget_lane_i16:
4831  case NEON::BI__builtin_neon_vduph_lane_i16:
4832  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
4833  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4834  "vget_lane");
4835  case NEON::BI__builtin_neon_vgetq_lane_i16:
4836  case NEON::BI__builtin_neon_vduph_laneq_i16:
4837  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
4838  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4839  "vgetq_lane");
4840  case NEON::BI__builtin_neon_vget_lane_i32:
4841  case NEON::BI__builtin_neon_vdups_lane_i32:
4842  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
4843  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4844  "vget_lane");
4845  case NEON::BI__builtin_neon_vdups_lane_f32:
4846  Ops[0] = Builder.CreateBitCast(Ops[0],
4847  llvm::VectorType::get(FloatTy, 2));
4848  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4849  "vdups_lane");
4850  case NEON::BI__builtin_neon_vgetq_lane_i32:
4851  case NEON::BI__builtin_neon_vdups_laneq_i32:
4852  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
4853  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4854  "vgetq_lane");
4855  case NEON::BI__builtin_neon_vget_lane_i64:
4856  case NEON::BI__builtin_neon_vdupd_lane_i64:
4857  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
4858  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4859  "vget_lane");
4860  case NEON::BI__builtin_neon_vdupd_lane_f64:
4861  Ops[0] = Builder.CreateBitCast(Ops[0],
4862  llvm::VectorType::get(DoubleTy, 1));
4863  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4864  "vdupd_lane");
4865  case NEON::BI__builtin_neon_vgetq_lane_i64:
4866  case NEON::BI__builtin_neon_vdupd_laneq_i64:
4867  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
4868  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4869  "vgetq_lane");
4870  case NEON::BI__builtin_neon_vget_lane_f32:
4871  Ops[0] = Builder.CreateBitCast(Ops[0],
4872  llvm::VectorType::get(FloatTy, 2));
4873  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4874  "vget_lane");
4875  case NEON::BI__builtin_neon_vget_lane_f64:
4876  Ops[0] = Builder.CreateBitCast(Ops[0],
4877  llvm::VectorType::get(DoubleTy, 1));
4878  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4879  "vget_lane");
4880  case NEON::BI__builtin_neon_vgetq_lane_f32:
4881  case NEON::BI__builtin_neon_vdups_laneq_f32:
4882  Ops[0] = Builder.CreateBitCast(Ops[0],
4883  llvm::VectorType::get(FloatTy, 4));
4884  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4885  "vgetq_lane");
4886  case NEON::BI__builtin_neon_vgetq_lane_f64:
4887  case NEON::BI__builtin_neon_vdupd_laneq_f64:
4888  Ops[0] = Builder.CreateBitCast(Ops[0],
4889  llvm::VectorType::get(DoubleTy, 2));
4890  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
4891  "vgetq_lane");
4892  case NEON::BI__builtin_neon_vaddd_s64:
4893  case NEON::BI__builtin_neon_vaddd_u64:
4894  return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
4895  case NEON::BI__builtin_neon_vsubd_s64:
4896  case NEON::BI__builtin_neon_vsubd_u64:
4897  return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
4898  case NEON::BI__builtin_neon_vqdmlalh_s16:
4899  case NEON::BI__builtin_neon_vqdmlslh_s16: {
4900  SmallVector<Value *, 2> ProductOps;
4901  ProductOps.push_back(vectorWrapScalar16(Ops[1]));
4902  ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
4903  llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
4904  Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
4905  ProductOps, "vqdmlXl");
4906  Constant *CI = ConstantInt::get(SizeTy, 0);
4907  Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
4908 
4909  unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
4910  ? Intrinsic::aarch64_neon_sqadd
4911  : Intrinsic::aarch64_neon_sqsub;
4912  return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
4913  }
4914  case NEON::BI__builtin_neon_vqshlud_n_s64: {
4915  Ops.push_back(EmitScalarExpr(E->getArg(1)));
4916  Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
4917  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
4918  Ops, "vqshlu_n");
4919  }
4920  case NEON::BI__builtin_neon_vqshld_n_u64:
4921  case NEON::BI__builtin_neon_vqshld_n_s64: {
4922  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
4923  ? Intrinsic::aarch64_neon_uqshl
4924  : Intrinsic::aarch64_neon_sqshl;
4925  Ops.push_back(EmitScalarExpr(E->getArg(1)));
4926  Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
4927  return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
4928  }
4929  case NEON::BI__builtin_neon_vrshrd_n_u64:
4930  case NEON::BI__builtin_neon_vrshrd_n_s64: {
4931  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
4932  ? Intrinsic::aarch64_neon_urshl
4933  : Intrinsic::aarch64_neon_srshl;
4934  Ops.push_back(EmitScalarExpr(E->getArg(1)));
4935  int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
4936  Ops[1] = ConstantInt::get(Int64Ty, -SV);
4937  return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
4938  }
4939  case NEON::BI__builtin_neon_vrsrad_n_u64:
4940  case NEON::BI__builtin_neon_vrsrad_n_s64: {
4941  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
4942  ? Intrinsic::aarch64_neon_urshl
4943  : Intrinsic::aarch64_neon_srshl;
4944  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
4945  Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
4946  Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
4947  {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
4948  return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
4949  }
4950  case NEON::BI__builtin_neon_vshld_n_s64:
4951  case NEON::BI__builtin_neon_vshld_n_u64: {
4952  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
4953  return Builder.CreateShl(
4954  Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
4955  }
4956  case NEON::BI__builtin_neon_vshrd_n_s64: {
4957  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
4958  return Builder.CreateAShr(
4959  Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
4960  Amt->getZExtValue())),
4961  "shrd_n");
4962  }
4963  case NEON::BI__builtin_neon_vshrd_n_u64: {
4964  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
4965  uint64_t ShiftAmt = Amt->getZExtValue();
4966  // Right-shifting an unsigned value by its size yields 0.
4967  if (ShiftAmt == 64)
4968  return ConstantInt::get(Int64Ty, 0);
4969  return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
4970  "shrd_n");
4971  }
4972  case NEON::BI__builtin_neon_vsrad_n_s64: {
4973  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
4974  Ops[1] = Builder.CreateAShr(
4975  Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
4976  Amt->getZExtValue())),
4977  "shrd_n");
4978  return Builder.CreateAdd(Ops[0], Ops[1]);
4979  }
4980  case NEON::BI__builtin_neon_vsrad_n_u64: {
4981  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
4982  uint64_t ShiftAmt = Amt->getZExtValue();
4983  // Right-shifting an unsigned value by its size yields 0.
4984  // As Op + 0 = Op, return Ops[0] directly.
4985  if (ShiftAmt == 64)
4986  return Ops[0];
4987  Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
4988  "shrd_n");
4989  return Builder.CreateAdd(Ops[0], Ops[1]);
4990  }
4991  case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
4992  case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
4993  case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
4994  case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
4995  Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
4996  "lane");
4997  SmallVector<Value *, 2> ProductOps;
4998  ProductOps.push_back(vectorWrapScalar16(Ops[1]));
4999  ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5000  llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5001  Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5002  ProductOps, "vqdmlXl");
5003  Constant *CI = ConstantInt::get(SizeTy, 0);
5004  Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5005  Ops.pop_back();
5006 
5007  unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5008  BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5009  ? Intrinsic::aarch64_neon_sqadd
5010  : Intrinsic::aarch64_neon_sqsub;
5011  return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5012  }
5013  case NEON::BI__builtin_neon_vqdmlals_s32:
5014  case NEON::BI__builtin_neon_vqdmlsls_s32: {
5015  SmallVector<Value *, 2> ProductOps;
5016  ProductOps.push_back(Ops[1]);
5017  ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
5018  Ops[1] =
5019  EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5020  ProductOps, "vqdmlXl");
5021 
5022  unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5023  ? Intrinsic::aarch64_neon_sqadd
5024  : Intrinsic::aarch64_neon_sqsub;
5025  return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5026  }
5027  case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5028  case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5029  case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5030  case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5031  Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5032  "lane");
5033  SmallVector<Value *, 2> ProductOps;
5034  ProductOps.push_back(Ops[1]);
5035  ProductOps.push_back(Ops[2]);
5036  Ops[1] =
5037  EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5038  ProductOps, "vqdmlXl");
5039  Ops.pop_back();
5040 
5041  unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5042  BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5043  ? Intrinsic::aarch64_neon_sqadd
5044  : Intrinsic::aarch64_neon_sqsub;
5045  return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
5046  }
5047  }
5048 
5049  llvm::VectorType *VTy = GetNeonType(this, Type);
5050  llvm::Type *Ty = VTy;
5051  if (!Ty)
5052  return nullptr;
5053 
5054  // Not all intrinsics handled by the common case work for AArch64 yet, so only
5055  // defer to common code if it's been added to our special map.
5056  Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
5057  AArch64SIMDIntrinsicsProvenSorted);
5058 
5059  if (Builtin)
5061  Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5062  Builtin->NameHint, Builtin->TypeModifier, E, Ops,
5063  /*never use addresses*/ Address::invalid(), Address::invalid());
5064 
5065  if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
5066  return V;
5067 
5068  unsigned Int;
5069  switch (BuiltinID) {
5070  default: return nullptr;
5071  case NEON::BI__builtin_neon_vbsl_v:
5072  case NEON::BI__builtin_neon_vbslq_v: {
5073  llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
5074  Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
5075  Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
5076  Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
5077 
5078  Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
5079  Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
5080  Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
5081  return Builder.CreateBitCast(Ops[0], Ty);
5082  }
5083  case NEON::BI__builtin_neon_vfma_lane_v:
5084  case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
5085  // The ARM builtins (and instructions) have the addend as the first
5086  // operand, but the 'fma' intrinsics have it last. Swap it around here.
5087  Value *Addend = Ops[0];
5088  Value *Multiplicand = Ops[1];
5089  Value *LaneSource = Ops[2];
5090  Ops[0] = Multiplicand;
5091  Ops[1] = LaneSource;
5092  Ops[2] = Addend;
5093 
5094  // Now adjust things to handle the lane access.
5095  llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
5096  llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
5097  VTy;
5098  llvm::Constant *cst = cast<Constant>(Ops[3]);
5099  Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
5100  Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
5101  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
5102 
5103  Ops.pop_back();
5104  Int = Intrinsic::fma;
5105  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
5106  }
5107  case NEON::BI__builtin_neon_vfma_laneq_v: {
5108  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
5109  // v1f64 fma should be mapped to Neon scalar f64 fma
5110  if (VTy && VTy->getElementType() == DoubleTy) {
5111  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5112  Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5113  llvm::Type *VTy = GetNeonType(this,
5114  NeonTypeFlags(NeonTypeFlags::Float64, false, true));
5115  Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
5116  Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5117  Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
5118  Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5119  return Builder.CreateBitCast(Result, Ty);
5120  }
5121  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5122  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5123  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5124 
5125  llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
5126  VTy->getNumElements() * 2);
5127  Ops[2] = Builder.CreateBitCast(Ops[2], STy);
5128  Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
5129  cast<ConstantInt>(Ops[3]));
5130  Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
5131 
5132  return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5133  }
5134  case NEON::BI__builtin_neon_vfmaq_laneq_v: {
5135  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5136  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5137  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5138 
5139  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5140  Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
5141  return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5142  }
5143  case NEON::BI__builtin_neon_vfmas_lane_f32:
5144  case NEON::BI__builtin_neon_vfmas_laneq_f32:
5145  case NEON::BI__builtin_neon_vfmad_lane_f64:
5146  case NEON::BI__builtin_neon_vfmad_laneq_f64: {
5147  Ops.push_back(EmitScalarExpr(E->getArg(3)));
5149  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5150  Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5151  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5152  }
5153  case NEON::BI__builtin_neon_vfms_v:
5154  case NEON::BI__builtin_neon_vfmsq_v: { // Only used for FP types
5155  // FIXME: probably remove when we no longer support aarch64_simd.h
5156  // (arm_neon.h delegates to vfma).
5157 
5158  // The ARM builtins (and instructions) have the addend as the first
5159  // operand, but the 'fma' intrinsics have it last. Swap it around here.
5160  Value *Subtrahend = Ops[0];
5161  Value *Multiplicand = Ops[2];
5162  Ops[0] = Multiplicand;
5163  Ops[2] = Subtrahend;
5164  Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
5165  Ops[1] = Builder.CreateFNeg(Ops[1]);
5166  Int = Intrinsic::fma;
5167  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmls");
5168  }
5169  case NEON::BI__builtin_neon_vmull_v:
5170  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5171  Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
5172  if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
5173  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
5174  case NEON::BI__builtin_neon_vmax_v:
5175  case NEON::BI__builtin_neon_vmaxq_v:
5176  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5177  Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
5178  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
5179  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
5180  case NEON::BI__builtin_neon_vmin_v:
5181  case NEON::BI__builtin_neon_vminq_v:
5182  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5183  Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
5184  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
5185  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
5186  case NEON::BI__builtin_neon_vabd_v:
5187  case NEON::BI__builtin_neon_vabdq_v:
5188  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5189  Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
5190  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
5191  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
5192  case NEON::BI__builtin_neon_vpadal_v:
5193  case NEON::BI__builtin_neon_vpadalq_v: {
5194  unsigned ArgElts = VTy->getNumElements();
5195  llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
5196  unsigned BitWidth = EltTy->getBitWidth();
5197  llvm::Type *ArgTy = llvm::VectorType::get(
5198  llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
5199  llvm::Type* Tys[2] = { VTy, ArgTy };
5200  Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
5202  TmpOps.push_back(Ops[1]);
5203  Function *F = CGM.getIntrinsic(Int, Tys);
5204  llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
5205  llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
5206  return Builder.CreateAdd(tmp, addend);
5207  }
5208  case NEON::BI__builtin_neon_vpmin_v:
5209  case NEON::BI__builtin_neon_vpminq_v:
5210  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5211  Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
5212  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
5213  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
5214  case NEON::BI__builtin_neon_vpmax_v:
5215  case NEON::BI__builtin_neon_vpmaxq_v:
5216  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5217  Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
5218  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
5219  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
5220  case NEON::BI__builtin_neon_vminnm_v:
5221  case NEON::BI__builtin_neon_vminnmq_v:
5222  Int = Intrinsic::aarch64_neon_fminnm;
5223  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
5224  case NEON::BI__builtin_neon_vmaxnm_v:
5225  case NEON::BI__builtin_neon_vmaxnmq_v:
5226  Int = Intrinsic::aarch64_neon_fmaxnm;
5227  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
5228  case NEON::BI__builtin_neon_vrecpss_f32: {
5229  Ops.push_back(EmitScalarExpr(E->getArg(1)));
5230  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
5231  Ops, "vrecps");
5232  }
5233  case NEON::BI__builtin_neon_vrecpsd_f64: {
5234  Ops.push_back(EmitScalarExpr(E->getArg(1)));
5235  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
5236  Ops, "vrecps");
5237  }
5238  case NEON::BI__builtin_neon_vqshrun_n_v:
5239  Int = Intrinsic::aarch64_neon_sqshrun;
5240  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
5241  case NEON::BI__builtin_neon_vqrshrun_n_v:
5242  Int = Intrinsic::aarch64_neon_sqrshrun;
5243  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
5244  case NEON::BI__builtin_neon_vqshrn_n_v:
5245  Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
5246  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
5247  case NEON::BI__builtin_neon_vrshrn_n_v:
5248  Int = Intrinsic::aarch64_neon_rshrn;
5249  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
5250  case NEON::BI__builtin_neon_vqrshrn_n_v:
5251  Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
5252  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
5253  case NEON::BI__builtin_neon_vrnda_v:
5254  case NEON::BI__builtin_neon_vrndaq_v: {
5255  Int = Intrinsic::round;
5256  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
5257  }
5258  case NEON::BI__builtin_neon_vrndi_v:
5259  case NEON::BI__builtin_neon_vrndiq_v: {
5260  Int = Intrinsic::nearbyint;
5261  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
5262  }
5263  case NEON::BI__builtin_neon_vrndm_v:
5264  case NEON::BI__builtin_neon_vrndmq_v: {
5265  Int = Intrinsic::floor;
5266  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
5267  }
5268  case NEON::BI__builtin_neon_vrndn_v:
5269  case NEON::BI__builtin_neon_vrndnq_v: {
5270  Int = Intrinsic::aarch64_neon_frintn;
5271  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
5272  }
5273  case NEON::BI__builtin_neon_vrndp_v:
5274  case NEON::BI__builtin_neon_vrndpq_v: {
5275  Int = Intrinsic::ceil;
5276  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
5277  }
5278  case NEON::BI__builtin_neon_vrndx_v:
5279  case NEON::BI__builtin_neon_vrndxq_v: {
5280  Int = Intrinsic::rint;
5281  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
5282  }
5283  case NEON::BI__builtin_neon_vrnd_v:
5284  case NEON::BI__builtin_neon_vrndq_v: {
5285  Int = Intrinsic::trunc;
5286  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
5287  }
5288  case NEON::BI__builtin_neon_vceqz_v:
5289  case NEON::BI__builtin_neon_vceqzq_v:
5290  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
5291  ICmpInst::ICMP_EQ, "vceqz");
5292  case NEON::BI__builtin_neon_vcgez_v:
5293  case NEON::BI__builtin_neon_vcgezq_v:
5294  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
5295  ICmpInst::ICMP_SGE, "vcgez");
5296  case NEON::BI__builtin_neon_vclez_v:
5297  case NEON::BI__builtin_neon_vclezq_v:
5298  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
5299  ICmpInst::ICMP_SLE, "vclez");
5300  case NEON::BI__builtin_neon_vcgtz_v:
5301  case NEON::BI__builtin_neon_vcgtzq_v:
5302  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
5303  ICmpInst::ICMP_SGT, "vcgtz");
5304  case NEON::BI__builtin_neon_vcltz_v:
5305  case NEON::BI__builtin_neon_vcltzq_v:
5306  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
5307  ICmpInst::ICMP_SLT, "vcltz");
5308  case NEON::BI__builtin_neon_vcvt_f64_v:
5309  case NEON::BI__builtin_neon_vcvtq_f64_v:
5310  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5311  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
5312  return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
5313  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
5314  case NEON::BI__builtin_neon_vcvt_f64_f32: {
5315  assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
5316  "unexpected vcvt_f64_f32 builtin");
5317  NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
5318  Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5319 
5320  return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
5321  }
5322  case NEON::BI__builtin_neon_vcvt_f32_f64: {
5323  assert(Type.getEltType() == NeonTypeFlags::Float32 &&
5324  "unexpected vcvt_f32_f64 builtin");
5325  NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
5326  Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5327 
5328  return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
5329  }
5330  case NEON::BI__builtin_neon_vcvt_s32_v:
5331  case NEON::BI__builtin_neon_vcvt_u32_v:
5332  case NEON::BI__builtin_neon_vcvt_s64_v:
5333  case NEON::BI__builtin_neon_vcvt_u64_v:
5334  case NEON::BI__builtin_neon_vcvtq_s32_v:
5335  case NEON::BI__builtin_neon_vcvtq_u32_v:
5336  case NEON::BI__builtin_neon_vcvtq_s64_v:
5337  case NEON::BI__builtin_neon_vcvtq_u64_v: {
5338  Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
5339  if (usgn)
5340  return Builder.CreateFPToUI(Ops[0], Ty);
5341  return Builder.CreateFPToSI(Ops[0], Ty);
5342  }
5343  case NEON::BI__builtin_neon_vcvta_s32_v:
5344  case NEON::BI__builtin_neon_vcvtaq_s32_v:
5345  case NEON::BI__builtin_neon_vcvta_u32_v:
5346  case NEON::BI__builtin_neon_vcvtaq_u32_v:
5347  case NEON::BI__builtin_neon_vcvta_s64_v:
5348  case NEON::BI__builtin_neon_vcvtaq_s64_v:
5349  case NEON::BI__builtin_neon_vcvta_u64_v:
5350  case NEON::BI__builtin_neon_vcvtaq_u64_v: {
5351  Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
5352  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5353  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
5354  }
5355  case NEON::BI__builtin_neon_vcvtm_s32_v:
5356  case NEON::BI__builtin_neon_vcvtmq_s32_v:
5357  case NEON::BI__builtin_neon_vcvtm_u32_v:
5358  case NEON::BI__builtin_neon_vcvtmq_u32_v:
5359  case NEON::BI__builtin_neon_vcvtm_s64_v:
5360  case NEON::BI__builtin_neon_vcvtmq_s64_v:
5361  case NEON::BI__builtin_neon_vcvtm_u64_v:
5362  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
5363  Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
5364  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5365  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
5366  }
5367  case NEON::BI__builtin_neon_vcvtn_s32_v:
5368  case NEON::BI__builtin_neon_vcvtnq_s32_v:
5369  case NEON::BI__builtin_neon_vcvtn_u32_v:
5370  case NEON::BI__builtin_neon_vcvtnq_u32_v:
5371  case NEON::BI__builtin_neon_vcvtn_s64_v:
5372  case NEON::BI__builtin_neon_vcvtnq_s64_v:
5373  case NEON::BI__builtin_neon_vcvtn_u64_v:
5374  case NEON::BI__builtin_neon_vcvtnq_u64_v: {
5375  Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
5376  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5377  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
5378  }
5379  case NEON::BI__builtin_neon_vcvtp_s32_v:
5380  case NEON::BI__builtin_neon_vcvtpq_s32_v:
5381  case NEON::BI__builtin_neon_vcvtp_u32_v:
5382  case NEON::BI__builtin_neon_vcvtpq_u32_v:
5383  case NEON::BI__builtin_neon_vcvtp_s64_v:
5384  case NEON::BI__builtin_neon_vcvtpq_s64_v:
5385  case NEON::BI__builtin_neon_vcvtp_u64_v:
5386  case NEON::BI__builtin_neon_vcvtpq_u64_v: {
5387  Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
5388  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5389  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
5390  }
5391  case NEON::BI__builtin_neon_vmulx_v:
5392  case NEON::BI__builtin_neon_vmulxq_v: {
5393  Int = Intrinsic::aarch64_neon_fmulx;
5394  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
5395  }
5396  case NEON::BI__builtin_neon_vmul_lane_v:
5397  case NEON::BI__builtin_neon_vmul_laneq_v: {
5398  // v1f64 vmul_lane should be mapped to Neon scalar mul lane
5399  bool Quad = false;
5400  if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
5401  Quad = true;
5402  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5403  llvm::Type *VTy = GetNeonType(this,
5404  NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
5405  Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
5406  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
5407  Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
5408  return Builder.CreateBitCast(Result, Ty);
5409  }
5410  case NEON::BI__builtin_neon_vnegd_s64:
5411  return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
5412  case NEON::BI__builtin_neon_vpmaxnm_v:
5413  case NEON::BI__builtin_neon_vpmaxnmq_v: {
5414  Int = Intrinsic::aarch64_neon_fmaxnmp;
5415  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
5416  }
5417  case NEON::BI__builtin_neon_vpminnm_v:
5418  case NEON::BI__builtin_neon_vpminnmq_v: {
5419  Int = Intrinsic::aarch64_neon_fminnmp;
5420  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
5421  }
5422  case NEON::BI__builtin_neon_vsqrt_v:
5423  case NEON::BI__builtin_neon_vsqrtq_v: {
5424  Int = Intrinsic::sqrt;
5425  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5426  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
5427  }
5428  case NEON::BI__builtin_neon_vrbit_v:
5429  case NEON::BI__builtin_neon_vrbitq_v: {
5430  Int = Intrinsic::aarch64_neon_rbit;
5431  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
5432  }
5433  case NEON::BI__builtin_neon_vaddv_u8:
5434  // FIXME: These are handled by the AArch64 scalar code.
5435  usgn = true;
5436  // FALLTHROUGH
5437  case NEON::BI__builtin_neon_vaddv_s8: {
5438  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5439  Ty = Int32Ty;
5440  VTy = llvm::VectorType::get(Int8Ty, 8);
5441  llvm::Type *Tys[2] = { Ty, VTy };
5442  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5443  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5444  return Builder.CreateTrunc(Ops[0], Int8Ty);
5445  }
5446  case NEON::BI__builtin_neon_vaddv_u16:
5447  usgn = true;
5448  // FALLTHROUGH
5449  case NEON::BI__builtin_neon_vaddv_s16: {
5450  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5451  Ty = Int32Ty;
5452  VTy = llvm::VectorType::get(Int16Ty, 4);
5453  llvm::Type *Tys[2] = { Ty, VTy };
5454  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5455  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5456  return Builder.CreateTrunc(Ops[0], Int16Ty);
5457  }
5458  case NEON::BI__builtin_neon_vaddvq_u8:
5459  usgn = true;
5460  // FALLTHROUGH
5461  case NEON::BI__builtin_neon_vaddvq_s8: {
5462  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5463  Ty = Int32Ty;
5464  VTy = llvm::VectorType::get(Int8Ty, 16);
5465  llvm::Type *Tys[2] = { Ty, VTy };
5466  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5467  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5468  return Builder.CreateTrunc(Ops[0], Int8Ty);
5469  }
5470  case NEON::BI__builtin_neon_vaddvq_u16:
5471  usgn = true;
5472  // FALLTHROUGH
5473  case NEON::BI__builtin_neon_vaddvq_s16: {
5474  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5475  Ty = Int32Ty;
5476  VTy = llvm::VectorType::get(Int16Ty, 8);
5477  llvm::Type *Tys[2] = { Ty, VTy };
5478  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5479  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5480  return Builder.CreateTrunc(Ops[0], Int16Ty);
5481  }
5482  case NEON::BI__builtin_neon_vmaxv_u8: {
5483  Int = Intrinsic::aarch64_neon_umaxv;
5484  Ty = Int32Ty;
5485  VTy = llvm::VectorType::get(Int8Ty, 8);
5486  llvm::Type *Tys[2] = { Ty, VTy };
5487  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5488  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5489  return Builder.CreateTrunc(Ops[0], Int8Ty);
5490  }
5491  case NEON::BI__builtin_neon_vmaxv_u16: {
5492  Int = Intrinsic::aarch64_neon_umaxv;
5493  Ty = Int32Ty;
5494  VTy = llvm::VectorType::get(Int16Ty, 4);
5495  llvm::Type *Tys[2] = { Ty, VTy };
5496  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5497  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5498  return Builder.CreateTrunc(Ops[0], Int16Ty);
5499  }
5500  case NEON::BI__builtin_neon_vmaxvq_u8: {
5501  Int = Intrinsic::aarch64_neon_umaxv;
5502  Ty = Int32Ty;
5503  VTy = llvm::VectorType::get(Int8Ty, 16);
5504  llvm::Type *Tys[2] = { Ty, VTy };
5505  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5506  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5507  return Builder.CreateTrunc(Ops[0], Int8Ty);
5508  }
5509  case NEON::BI__builtin_neon_vmaxvq_u16: {
5510  Int = Intrinsic::aarch64_neon_umaxv;
5511  Ty = Int32Ty;
5512  VTy = llvm::VectorType::get(Int16Ty, 8);
5513  llvm::Type *Tys[2] = { Ty, VTy };
5514  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5515  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5516  return Builder.CreateTrunc(Ops[0], Int16Ty);
5517  }
5518  case NEON::BI__builtin_neon_vmaxv_s8: {
5519  Int = Intrinsic::aarch64_neon_smaxv;
5520  Ty = Int32Ty;
5521  VTy = llvm::VectorType::get(Int8Ty, 8);
5522  llvm::Type *Tys[2] = { Ty, VTy };
5523  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5524  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5525  return Builder.CreateTrunc(Ops[0], Int8Ty);
5526  }
5527  case NEON::BI__builtin_neon_vmaxv_s16: {
5528  Int = Intrinsic::aarch64_neon_smaxv;
5529  Ty = Int32Ty;
5530  VTy = llvm::VectorType::get(Int16Ty, 4);
5531  llvm::Type *Tys[2] = { Ty, VTy };
5532  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5533  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5534  return Builder.CreateTrunc(Ops[0], Int16Ty);
5535  }
5536  case NEON::BI__builtin_neon_vmaxvq_s8: {
5537  Int = Intrinsic::aarch64_neon_smaxv;
5538  Ty = Int32Ty;
5539  VTy = llvm::VectorType::get(Int8Ty, 16);
5540  llvm::Type *Tys[2] = { Ty, VTy };
5541  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5542  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5543  return Builder.CreateTrunc(Ops[0], Int8Ty);
5544  }
5545  case NEON::BI__builtin_neon_vmaxvq_s16: {
5546  Int = Intrinsic::aarch64_neon_smaxv;
5547  Ty = Int32Ty;
5548  VTy = llvm::VectorType::get(Int16Ty, 8);
5549  llvm::Type *Tys[2] = { Ty, VTy };
5550  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5551  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5552  return Builder.CreateTrunc(Ops[0], Int16Ty);
5553  }
5554  case NEON::BI__builtin_neon_vminv_u8: {
5555  Int = Intrinsic::aarch64_neon_uminv;
5556  Ty = Int32Ty;
5557  VTy = llvm::VectorType::get(Int8Ty, 8);
5558  llvm::Type *Tys[2] = { Ty, VTy };
5559  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5560  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5561  return Builder.CreateTrunc(Ops[0], Int8Ty);
5562  }
5563  case NEON::BI__builtin_neon_vminv_u16: {
5564  Int = Intrinsic::aarch64_neon_uminv;
5565  Ty = Int32Ty;
5566  VTy = llvm::VectorType::get(Int16Ty, 4);
5567  llvm::Type *Tys[2] = { Ty, VTy };
5568  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5569  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5570  return Builder.CreateTrunc(Ops[0], Int16Ty);
5571  }
5572  case NEON::BI__builtin_neon_vminvq_u8: {
5573  Int = Intrinsic::aarch64_neon_uminv;
5574  Ty = Int32Ty;
5575  VTy = llvm::VectorType::get(Int8Ty, 16);
5576  llvm::Type *Tys[2] = { Ty, VTy };
5577  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5578  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5579  return Builder.CreateTrunc(Ops[0], Int8Ty);
5580  }
5581  case NEON::BI__builtin_neon_vminvq_u16: {
5582  Int = Intrinsic::aarch64_neon_uminv;
5583  Ty = Int32Ty;
5584  VTy = llvm::VectorType::get(Int16Ty, 8);
5585  llvm::Type *Tys[2] = { Ty, VTy };
5586  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5587  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5588  return Builder.CreateTrunc(Ops[0], Int16Ty);
5589  }
5590  case NEON::BI__builtin_neon_vminv_s8: {
5591  Int = Intrinsic::aarch64_neon_sminv;
5592  Ty = Int32Ty;
5593  VTy = llvm::VectorType::get(Int8Ty, 8);
5594  llvm::Type *Tys[2] = { Ty, VTy };
5595  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5596  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5597  return Builder.CreateTrunc(Ops[0], Int8Ty);
5598  }
5599  case NEON::BI__builtin_neon_vminv_s16: {
5600  Int = Intrinsic::aarch64_neon_sminv;
5601  Ty = Int32Ty;
5602  VTy = llvm::VectorType::get(Int16Ty, 4);
5603  llvm::Type *Tys[2] = { Ty, VTy };
5604  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5605  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5606  return Builder.CreateTrunc(Ops[0], Int16Ty);
5607  }
5608  case NEON::BI__builtin_neon_vminvq_s8: {
5609  Int = Intrinsic::aarch64_neon_sminv;
5610  Ty = Int32Ty;
5611  VTy = llvm::VectorType::get(Int8Ty, 16);
5612  llvm::Type *Tys[2] = { Ty, VTy };
5613  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5614  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5615  return Builder.CreateTrunc(Ops[0], Int8Ty);
5616  }
5617  case NEON::BI__builtin_neon_vminvq_s16: {
5618  Int = Intrinsic::aarch64_neon_sminv;
5619  Ty = Int32Ty;
5620  VTy = llvm::VectorType::get(Int16Ty, 8);
5621  llvm::Type *Tys[2] = { Ty, VTy };
5622  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5623  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
5624  return Builder.CreateTrunc(Ops[0], Int16Ty);
5625  }
5626  case NEON::BI__builtin_neon_vmul_n_f64: {
5627  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5628  Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
5629  return Builder.CreateFMul(Ops[0], RHS);
5630  }
5631  case NEON::BI__builtin_neon_vaddlv_u8: {
5632  Int = Intrinsic::aarch64_neon_uaddlv;
5633  Ty = Int32Ty;
5634  VTy = llvm::VectorType::get(Int8Ty, 8);
5635  llvm::Type *Tys[2] = { Ty, VTy };
5636  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5637  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5638  return Builder.CreateTrunc(Ops[0], Int16Ty);
5639  }
5640  case NEON::BI__builtin_neon_vaddlv_u16: {
5641  Int = Intrinsic::aarch64_neon_uaddlv;
5642  Ty = Int32Ty;
5643  VTy = llvm::VectorType::get(Int16Ty, 4);
5644  llvm::Type *Tys[2] = { Ty, VTy };
5645  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5646  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5647  }
5648  case NEON::BI__builtin_neon_vaddlvq_u8: {
5649  Int = Intrinsic::aarch64_neon_uaddlv;
5650  Ty = Int32Ty;
5651  VTy = llvm::VectorType::get(Int8Ty, 16);
5652  llvm::Type *Tys[2] = { Ty, VTy };
5653  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5654  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5655  return Builder.CreateTrunc(Ops[0], Int16Ty);
5656  }
5657  case NEON::BI__builtin_neon_vaddlvq_u16: {
5658  Int = Intrinsic::aarch64_neon_uaddlv;
5659  Ty = Int32Ty;
5660  VTy = llvm::VectorType::get(Int16Ty, 8);
5661  llvm::Type *Tys[2] = { Ty, VTy };
5662  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5663  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5664  }
5665  case NEON::BI__builtin_neon_vaddlv_s8: {
5666  Int = Intrinsic::aarch64_neon_saddlv;
5667  Ty = Int32Ty;
5668  VTy = llvm::VectorType::get(Int8Ty, 8);
5669  llvm::Type *Tys[2] = { Ty, VTy };
5670  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5671  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5672  return Builder.CreateTrunc(Ops[0], Int16Ty);
5673  }
5674  case NEON::BI__builtin_neon_vaddlv_s16: {
5675  Int = Intrinsic::aarch64_neon_saddlv;
5676  Ty = Int32Ty;
5677  VTy = llvm::VectorType::get(Int16Ty, 4);
5678  llvm::Type *Tys[2] = { Ty, VTy };
5679  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5680  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5681  }
5682  case NEON::BI__builtin_neon_vaddlvq_s8: {
5683  Int = Intrinsic::aarch64_neon_saddlv;
5684  Ty = Int32Ty;
5685  VTy = llvm::VectorType::get(Int8Ty, 16);
5686  llvm::Type *Tys[2] = { Ty, VTy };
5687  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5688  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5689  return Builder.CreateTrunc(Ops[0], Int16Ty);
5690  }
5691  case NEON::BI__builtin_neon_vaddlvq_s16: {
5692  Int = Intrinsic::aarch64_neon_saddlv;
5693  Ty = Int32Ty;
5694  VTy = llvm::VectorType::get(Int16Ty, 8);
5695  llvm::Type *Tys[2] = { Ty, VTy };
5696  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5697  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
5698  }
5699  case NEON::BI__builtin_neon_vsri_n_v:
5700  case NEON::BI__builtin_neon_vsriq_n_v: {
5701  Int = Intrinsic::aarch64_neon_vsri;
5702  llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
5703  return EmitNeonCall(Intrin, Ops, "vsri_n");
5704  }
5705  case NEON::BI__builtin_neon_vsli_n_v:
5706  case NEON::BI__builtin_neon_vsliq_n_v: {
5707  Int = Intrinsic::aarch64_neon_vsli;
5708  llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
5709  return EmitNeonCall(Intrin, Ops, "vsli_n");
5710  }
5711  case NEON::BI__builtin_neon_vsra_n_v:
5712  case NEON::BI__builtin_neon_vsraq_n_v:
5713  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5714  Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5715  return Builder.CreateAdd(Ops[0], Ops[1]);
5716  case NEON::BI__builtin_neon_vrsra_n_v:
5717  case NEON::BI__builtin_neon_vrsraq_n_v: {
5718  Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
5720  TmpOps.push_back(Ops[1]);
5721  TmpOps.push_back(Ops[2]);
5722  Function* F = CGM.getIntrinsic(Int, Ty);
5723  llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
5724  Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
5725  return Builder.CreateAdd(Ops[0], tmp);
5726  }
5727  // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
5728  // of an Align parameter here.
5729  case NEON::BI__builtin_neon_vld1_x2_v:
5730  case NEON::BI__builtin_neon_vld1q_x2_v:
5731  case NEON::BI__builtin_neon_vld1_x3_v:
5732  case NEON::BI__builtin_neon_vld1q_x3_v:
5733  case NEON::BI__builtin_neon_vld1_x4_v:
5734  case NEON::BI__builtin_neon_vld1q_x4_v: {
5735  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
5736  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5737  llvm::Type *Tys[2] = { VTy, PTy };
5738  unsigned Int;
5739  switch (BuiltinID) {
5740  case NEON::BI__builtin_neon_vld1_x2_v:
5741  case NEON::BI__builtin_neon_vld1q_x2_v:
5742  Int = Intrinsic::aarch64_neon_ld1x2;
5743  break;
5744  case NEON::BI__builtin_neon_vld1_x3_v:
5745  case NEON::BI__builtin_neon_vld1q_x3_v:
5746  Int = Intrinsic::aarch64_neon_ld1x3;
5747  break;
5748  case NEON::BI__builtin_neon_vld1_x4_v:
5749  case NEON::BI__builtin_neon_vld1q_x4_v:
5750  Int = Intrinsic::aarch64_neon_ld1x4;
5751  break;
5752  }
5753  Function *F = CGM.getIntrinsic(Int, Tys);
5754  Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
5755  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5756  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5757  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5758  }
5759  case NEON::BI__builtin_neon_vst1_x2_v:
5760  case NEON::BI__builtin_neon_vst1q_x2_v:
5761  case NEON::BI__builtin_neon_vst1_x3_v:
5762  case NEON::BI__builtin_neon_vst1q_x3_v:
5763  case NEON::BI__builtin_neon_vst1_x4_v:
5764  case NEON::BI__builtin_neon_vst1q_x4_v: {
5765  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
5766  llvm::Type *Tys[2] = { VTy, PTy };
5767  unsigned Int;
5768  switch (BuiltinID) {
5769  case NEON::BI__builtin_neon_vst1_x2_v:
5770  case NEON::BI__builtin_neon_vst1q_x2_v:
5771  Int = Intrinsic::aarch64_neon_st1x2;
5772  break;
5773  case NEON::BI__builtin_neon_vst1_x3_v:
5774  case NEON::BI__builtin_neon_vst1q_x3_v:
5775  Int = Intrinsic::aarch64_neon_st1x3;
5776  break;
5777  case NEON::BI__builtin_neon_vst1_x4_v:
5778  case NEON::BI__builtin_neon_vst1q_x4_v:
5779  Int = Intrinsic::aarch64_neon_st1x4;
5780  break;
5781  }
5782  std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
5783  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
5784  }
5785  case NEON::BI__builtin_neon_vld1_v:
5786  case NEON::BI__builtin_neon_vld1q_v:
5787  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
5788  return Builder.CreateDefaultAlignedLoad(Ops[0]);
5789  case NEON::BI__builtin_neon_vst1_v:
5790  case NEON::BI__builtin_neon_vst1q_v:
5791  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
5792  Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
5793  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5794  case NEON::BI__builtin_neon_vld1_lane_v:
5795  case NEON::BI__builtin_neon_vld1q_lane_v:
5796  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5797  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
5798  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5799  Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
5800  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
5801  case NEON::BI__builtin_neon_vld1_dup_v:
5802  case NEON::BI__builtin_neon_vld1q_dup_v: {
5803  Value *V = UndefValue::get(Ty);
5804  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
5805  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5806  Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
5807  llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5808  Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
5809  return EmitNeonSplat(Ops[0], CI);
5810  }
5811  case NEON::BI__builtin_neon_vst1_lane_v:
5812  case NEON::BI__builtin_neon_vst1q_lane_v:
5813  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5814  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5815  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5816  return Builder.CreateDefaultAlignedStore(Ops[1],
5817  Builder.CreateBitCast(Ops[0], Ty));
5818  case NEON::BI__builtin_neon_vld2_v:
5819  case NEON::BI__builtin_neon_vld2q_v: {
5820  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
5821  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5822  llvm::Type *Tys[2] = { VTy, PTy };
5823  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
5824  Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
5825  Ops[0] = Builder.CreateBitCast(Ops[0],
5826  llvm::PointerType::getUnqual(Ops[1]->getType()));
5827  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5828  }
5829  case NEON::BI__builtin_neon_vld3_v:
5830  case NEON::BI__builtin_neon_vld3q_v: {
5831  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
5832  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5833  llvm::Type *Tys[2] = { VTy, PTy };
5834  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
5835  Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
5836  Ops[0] = Builder.CreateBitCast(Ops[0],
5837  llvm::PointerType::getUnqual(Ops[1]->getType()));
5838  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5839  }
5840  case NEON::BI__builtin_neon_vld4_v:
5841  case NEON::BI__builtin_neon_vld4q_v: {
5842  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
5843  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5844  llvm::Type *Tys[2] = { VTy, PTy };
5845  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
5846  Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
5847  Ops[0] = Builder.CreateBitCast(Ops[0],
5848  llvm::PointerType::getUnqual(Ops[1]->getType()));
5849  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5850  }
5851  case NEON::BI__builtin_neon_vld2_dup_v:
5852  case NEON::BI__builtin_neon_vld2q_dup_v: {
5853  llvm::Type *PTy =
5854  llvm::PointerType::getUnqual(VTy->getElementType());
5855  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5856  llvm::Type *Tys[2] = { VTy, PTy };
5857  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
5858  Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
5859  Ops[0] = Builder.CreateBitCast(Ops[0],
5860  llvm::PointerType::getUnqual(Ops[1]->getType()));
5861  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5862  }
5863  case NEON::BI__builtin_neon_vld3_dup_v:
5864  case NEON::BI__builtin_neon_vld3q_dup_v: {
5865  llvm::Type *PTy =
5866  llvm::PointerType::getUnqual(VTy->getElementType());
5867  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5868  llvm::Type *Tys[2] = { VTy, PTy };
5869  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
5870  Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
5871  Ops[0] = Builder.CreateBitCast(Ops[0],
5872  llvm::PointerType::getUnqual(Ops[1]->getType()));
5873  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5874  }
5875  case NEON::BI__builtin_neon_vld4_dup_v:
5876  case NEON::BI__builtin_neon_vld4q_dup_v: {
5877  llvm::Type *PTy =
5878  llvm::PointerType::getUnqual(VTy->getElementType());
5879  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5880  llvm::Type *Tys[2] = { VTy, PTy };
5881  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
5882  Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
5883  Ops[0] = Builder.CreateBitCast(Ops[0],
5884  llvm::PointerType::getUnqual(Ops[1]->getType()));
5885  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5886  }
5887  case NEON::BI__builtin_neon_vld2_lane_v:
5888  case NEON::BI__builtin_neon_vld2q_lane_v: {
5889  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
5890  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
5891  Ops.push_back(Ops[1]);
5892  Ops.erase(Ops.begin()+1);
5893  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5894  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5895  Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
5896  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
5897  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5898  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5899  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5900  }
5901  case NEON::BI__builtin_neon_vld3_lane_v:
5902  case NEON::BI__builtin_neon_vld3q_lane_v: {
5903  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
5904  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
5905  Ops.push_back(Ops[1]);
5906  Ops.erase(Ops.begin()+1);
5907  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5908  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5909  Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
5910  Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
5911  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
5912  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5913  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5914  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5915  }
5916  case NEON::BI__builtin_neon_vld4_lane_v:
5917  case NEON::BI__builtin_neon_vld4q_lane_v: {
5918  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
5919  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
5920  Ops.push_back(Ops[1]);
5921  Ops.erase(Ops.begin()+1);
5922  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5923  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5924  Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
5925  Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
5926  Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
5927  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
5928  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5929  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5930  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5931  }
5932  case NEON::BI__builtin_neon_vst2_v:
5933  case NEON::BI__builtin_neon_vst2q_v: {
5934  Ops.push_back(Ops[0]);
5935  Ops.erase(Ops.begin());
5936  llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
5937  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
5938  Ops, "");
5939  }
5940  case NEON::BI__builtin_neon_vst2_lane_v:
5941  case NEON::BI__builtin_neon_vst2q_lane_v: {
5942  Ops.push_back(Ops[0]);
5943  Ops.erase(Ops.begin());
5944  Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
5945  llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
5946  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
5947  Ops, "");
5948  }
5949  case NEON::BI__builtin_neon_vst3_v:
5950  case NEON::BI__builtin_neon_vst3q_v: {
5951  Ops.push_back(Ops[0]);
5952  Ops.erase(Ops.begin());
5953  llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
5954  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
5955  Ops, "");
5956  }
5957  case NEON::BI__builtin_neon_vst3_lane_v:
5958  case NEON::BI__builtin_neon_vst3q_lane_v: {
5959  Ops.push_back(Ops[0]);
5960  Ops.erase(Ops.begin());
5961  Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
5962  llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
5963  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
5964  Ops, "");
5965  }
5966  case NEON::BI__builtin_neon_vst4_v:
5967  case NEON::BI__builtin_neon_vst4q_v: {
5968  Ops.push_back(Ops[0]);
5969  Ops.erase(Ops.begin());
5970  llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
5971  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
5972  Ops, "");
5973  }
5974  case NEON::BI__builtin_neon_vst4_lane_v:
5975  case NEON::BI__builtin_neon_vst4q_lane_v: {
5976  Ops.push_back(Ops[0]);
5977  Ops.erase(Ops.begin());
5978  Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
5979  llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
5980  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
5981  Ops, "");
5982  }
5983  case NEON::BI__builtin_neon_vtrn_v:
5984  case NEON::BI__builtin_neon_vtrnq_v: {
5985  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5986  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5987  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5988  Value *SV = nullptr;
5989 
5990  for (unsigned vi = 0; vi != 2; ++vi) {
5992  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
5993  Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
5994  Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
5995  }
5996  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5997  SV = llvm::ConstantVector::get(Indices);
5998  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
5999  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6000  }
6001  return SV;
6002  }
6003  case NEON::BI__builtin_neon_vuzp_v:
6004  case NEON::BI__builtin_neon_vuzpq_v: {
6005  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6006  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6007  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6008  Value *SV = nullptr;
6009 
6010  for (unsigned vi = 0; vi != 2; ++vi) {
6012  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6013  Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
6014 
6015  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6016  SV = llvm::ConstantVector::get(Indices);
6017  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
6018  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6019  }
6020  return SV;
6021  }
6022  case NEON::BI__builtin_neon_vzip_v:
6023  case NEON::BI__builtin_neon_vzipq_v: {
6024  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6025  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6026  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6027  Value *SV = nullptr;
6028 
6029  for (unsigned vi = 0; vi != 2; ++vi) {
6031  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6032  Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
6033  Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
6034  }
6035  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6036  SV = llvm::ConstantVector::get(Indices);
6037  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
6038  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6039  }
6040  return SV;
6041  }
6042  case NEON::BI__builtin_neon_vqtbl1q_v: {
6043  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6044  Ops, "vtbl1");
6045  }
6046  case NEON::BI__builtin_neon_vqtbl2q_v: {
6047  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6048  Ops, "vtbl2");
6049  }
6050  case NEON::BI__builtin_neon_vqtbl3q_v: {
6051  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6052  Ops, "vtbl3");
6053  }
6054  case NEON::BI__builtin_neon_vqtbl4q_v: {
6055  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6056  Ops, "vtbl4");
6057  }
6058  case NEON::BI__builtin_neon_vqtbx1q_v: {
6059  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6060  Ops, "vtbx1");
6061  }
6062  case NEON::BI__builtin_neon_vqtbx2q_v: {
6063  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6064  Ops, "vtbx2");
6065  }
6066  case NEON::BI__builtin_neon_vqtbx3q_v: {
6067  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6068  Ops, "vtbx3");
6069  }
6070  case NEON::BI__builtin_neon_vqtbx4q_v: {
6071  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6072  Ops, "vtbx4");
6073  }
6074  case NEON::BI__builtin_neon_vsqadd_v:
6075  case NEON::BI__builtin_neon_vsqaddq_v: {
6076  Int = Intrinsic::aarch64_neon_usqadd;
6077  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
6078  }
6079  case NEON::BI__builtin_neon_vuqadd_v:
6080  case NEON::BI__builtin_neon_vuqaddq_v: {
6081  Int = Intrinsic::aarch64_neon_suqadd;
6082  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
6083  }
6084  }
6085 }
6086 
6089  assert((Ops.size() & (Ops.size() - 1)) == 0 &&
6090  "Not a power-of-two sized vector!");
6091  bool AllConstants = true;
6092  for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
6093  AllConstants &= isa<Constant>(Ops[i]);
6094 
6095  // If this is a constant vector, create a ConstantVector.
6096  if (AllConstants) {
6098  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6099  CstOps.push_back(cast<Constant>(Ops[i]));
6100  return llvm::ConstantVector::get(CstOps);
6101  }
6102 
6103  // Otherwise, insertelement the values to build the vector.
6104  Value *Result =
6105  llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
6106 
6107  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6108  Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
6109 
6110  return Result;
6111 }
6112 
6113 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
6114  const CallExpr *E) {
6115  if (BuiltinID == X86::BI__builtin_ms_va_start ||
6116  BuiltinID == X86::BI__builtin_ms_va_end)
6117  return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
6118  BuiltinID == X86::BI__builtin_ms_va_start);
6119  if (BuiltinID == X86::BI__builtin_ms_va_copy) {
6120  // Lower this manually. We can't reliably determine whether or not any
6121  // given va_copy() is for a Win64 va_list from the calling convention
6122  // alone, because it's legal to do this from a System V ABI function.
6123  // With opaque pointer types, we won't have enough information in LLVM
6124  // IR to determine this from the argument types, either. Best to do it
6125  // now, while we have enough information.
6126  Address DestAddr = EmitMSVAListRef(E->getArg(0));
6127  Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6128 
6129  llvm::Type *BPP = Int8PtrPtrTy;
6130 
6131  DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
6132  DestAddr.getAlignment());
6133  SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
6134  SrcAddr.getAlignment());
6135 
6136  Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6137  return Builder.CreateStore(ArgPtr, DestAddr);
6138  }
6139 
6141 
6142  // Find out if any arguments are required to be integer constant expressions.
6143  unsigned ICEArguments = 0;
6145  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6146  assert(Error == ASTContext::GE_None && "Should not codegen an error");
6147 
6148  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
6149  // If this is a normal argument, just emit it as a scalar.
6150  if ((ICEArguments & (1 << i)) == 0) {
6151  Ops.push_back(EmitScalarExpr(E->getArg(i)));
6152  continue;
6153  }
6154 
6155  // If this is required to be a constant, constant fold it so that we know
6156  // that the generated intrinsic gets a ConstantInt.
6157  llvm::APSInt Result;
6158  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
6159  assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
6160  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
6161  }
6162 
6163  switch (BuiltinID) {
6164  default: return nullptr;
6165  case X86::BI__builtin_cpu_supports: {
6166  const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
6167  StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
6168 
6169  // TODO: When/if this becomes more than x86 specific then use a TargetInfo
6170  // based mapping.
6171  // Processor features and mapping to processor feature value.
6172  enum X86Features {
6173  CMOV = 0,
6174  MMX,
6175  POPCNT,
6176  SSE,
6177  SSE2,
6178  SSE3,
6179  SSSE3,
6180  SSE4_1,
6181  SSE4_2,
6182  AVX,
6183  AVX2,
6184  SSE4_A,
6185  FMA4,
6186  XOP,
6187  FMA,
6188  AVX512F,
6189  BMI,
6190  BMI2,
6191  MAX
6192  };
6193 
6194  X86Features Feature = StringSwitch<X86Features>(FeatureStr)
6195  .Case("cmov", X86Features::CMOV)
6196  .Case("mmx", X86Features::MMX)
6197  .Case("popcnt", X86Features::POPCNT)
6198  .Case("sse", X86Features::SSE)
6199  .Case("sse2", X86Features::SSE2)
6200  .Case("sse3", X86Features::SSE3)
6201  .Case("sse4.1", X86Features::SSE4_1)
6202  .Case("sse4.2", X86Features::SSE4_2)
6203  .Case("avx", X86Features::AVX)
6204  .Case("avx2", X86Features::AVX2)
6205  .Case("sse4a", X86Features::SSE4_A)
6206  .Case("fma4", X86Features::FMA4)
6207  .Case("xop", X86Features::XOP)
6208  .Case("fma", X86Features::FMA)
6209  .Case("avx512f", X86Features::AVX512F)
6210  .Case("bmi", X86Features::BMI)
6211  .Case("bmi2", X86Features::BMI2)
6212  .Default(X86Features::MAX);
6213  assert(Feature != X86Features::MAX && "Invalid feature!");
6214 
6215  // Matching the struct layout from the compiler-rt/libgcc structure that is
6216  // filled in:
6217  // unsigned int __cpu_vendor;
6218  // unsigned int __cpu_type;
6219  // unsigned int __cpu_subtype;
6220  // unsigned int __cpu_features[1];
6221  llvm::Type *STy = llvm::StructType::get(
6222  Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr);
6223 
6224  // Grab the global __cpu_model.
6225  llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
6226 
6227  // Grab the first (0th) element from the field __cpu_features off of the
6228  // global in the struct STy.
6229  Value *Idxs[] = {
6230  ConstantInt::get(Int32Ty, 0),
6231  ConstantInt::get(Int32Ty, 3),
6232  ConstantInt::get(Int32Ty, 0)
6233  };
6234  Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
6235  Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
6237 
6238  // Check the value of the bit corresponding to the feature requested.
6239  Value *Bitset = Builder.CreateAnd(
6240  Features, llvm::ConstantInt::get(Int32Ty, 1 << Feature));
6241  return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
6242  }
6243  case X86::BI_mm_prefetch: {
6244  Value *Address = Ops[0];
6245  Value *RW = ConstantInt::get(Int32Ty, 0);
6246  Value *Locality = Ops[1];
6247  Value *Data = ConstantInt::get(Int32Ty, 1);
6248  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
6249  return Builder.CreateCall(F, {Address, RW, Locality, Data});
6250  }
6251  case X86::BI__builtin_ia32_undef128:
6252  case X86::BI__builtin_ia32_undef256:
6253  case X86::BI__builtin_ia32_undef512:
6254  return UndefValue::get(ConvertType(E->getType()));
6255  case X86::BI__builtin_ia32_vec_init_v8qi:
6256  case X86::BI__builtin_ia32_vec_init_v4hi:
6257  case X86::BI__builtin_ia32_vec_init_v2si:
6258  return Builder.CreateBitCast(BuildVector(Ops),
6259  llvm::Type::getX86_MMXTy(getLLVMContext()));
6260  case X86::BI__builtin_ia32_vec_ext_v2si:
6261  return Builder.CreateExtractElement(Ops[0],
6262  llvm::ConstantInt::get(Ops[1]->getType(), 0));
6263  case X86::BI__builtin_ia32_ldmxcsr: {
6264  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
6265  Builder.CreateStore(Ops[0], Tmp);
6266  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
6268  }
6269  case X86::BI__builtin_ia32_stmxcsr: {
6270  Address Tmp = CreateMemTemp(E->getType());
6271  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
6273  return Builder.CreateLoad(Tmp, "stmxcsr");
6274  }
6275  case X86::BI__builtin_ia32_xsave:
6276  case X86::BI__builtin_ia32_xsave64:
6277  case X86::BI__builtin_ia32_xrstor:
6278  case X86::BI__builtin_ia32_xrstor64:
6279  case X86::BI__builtin_ia32_xsaveopt:
6280  case X86::BI__builtin_ia32_xsaveopt64:
6281  case X86::BI__builtin_ia32_xrstors:
6282  case X86::BI__builtin_ia32_xrstors64:
6283  case X86::BI__builtin_ia32_xsavec:
6284  case X86::BI__builtin_ia32_xsavec64:
6285  case X86::BI__builtin_ia32_xsaves:
6286  case X86::BI__builtin_ia32_xsaves64: {
6287  Intrinsic::ID ID;
6288 #define INTRINSIC_X86_XSAVE_ID(NAME) \
6289  case X86::BI__builtin_ia32_##NAME: \
6290  ID = Intrinsic::x86_##NAME; \
6291  break
6292  switch (BuiltinID) {
6293  default: llvm_unreachable("Unsupported intrinsic!");
6294  INTRINSIC_X86_XSAVE_ID(xsave);
6295  INTRINSIC_X86_XSAVE_ID(xsave64);
6296  INTRINSIC_X86_XSAVE_ID(xrstor);
6297  INTRINSIC_X86_XSAVE_ID(xrstor64);
6298  INTRINSIC_X86_XSAVE_ID(xsaveopt);
6299  INTRINSIC_X86_XSAVE_ID(xsaveopt64);
6300  INTRINSIC_X86_XSAVE_ID(xrstors);
6301  INTRINSIC_X86_XSAVE_ID(xrstors64);
6302  INTRINSIC_X86_XSAVE_ID(xsavec);
6303  INTRINSIC_X86_XSAVE_ID(xsavec64);
6304  INTRINSIC_X86_XSAVE_ID(xsaves);
6305  INTRINSIC_X86_XSAVE_ID(xsaves64);
6306  }
6307 #undef INTRINSIC_X86_XSAVE_ID
6308  Value *Mhi = Builder.CreateTrunc(
6309  Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
6310  Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
6311  Ops[1] = Mhi;
6312  Ops.push_back(Mlo);
6313  return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
6314  }
6315  case X86::BI__builtin_ia32_storehps:
6316  case X86::BI__builtin_ia32_storelps: {
6317  llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
6318  llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
6319 
6320  // cast val v2i64
6321  Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
6322 
6323  // extract (0, 1)
6324  unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
6325  llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
6326  Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
6327 
6328  // cast pointer to i64 & store
6329  Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
6330  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6331  }
6332  case X86::BI__builtin_ia32_palignr128:
6333  case X86::BI__builtin_ia32_palignr256: {
6334  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
6335 
6336  unsigned NumElts =
6337  cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
6338  assert(NumElts % 16 == 0);
6339  unsigned NumLanes = NumElts / 16;
6340  unsigned NumLaneElts = NumElts / NumLanes;
6341 
6342  // If palignr is shifting the pair of vectors more than the size of two
6343  // lanes, emit zero.
6344  if (ShiftVal >= (2 * NumLaneElts))
6345  return llvm::Constant::getNullValue(ConvertType(E->getType()));
6346 
6347  // If palignr is shifting the pair of input vectors more than one lane,
6348  // but less than two lanes, convert to shifting in zeroes.
6349  if (ShiftVal > NumLaneElts) {
6350  ShiftVal -= NumLaneElts;
6351  Ops[1] = Ops[0];
6352  Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
6353  }
6354 
6355  uint32_t Indices[32];
6356  // 256-bit palignr operates on 128-bit lanes so we need to handle that
6357  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
6358  for (unsigned i = 0; i != NumLaneElts; ++i) {
6359  unsigned Idx = ShiftVal + i;
6360  if (Idx >= NumLaneElts)
6361  Idx += NumElts - NumLaneElts; // End of lane, switch operand.
6362  Indices[l + i] = Idx + l;
6363  }
6364  }
6365 
6366  Value *SV = llvm::ConstantDataVector::get(getLLVMContext(),
6367  makeArrayRef(Indices, NumElts));
6368  return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
6369  }
6370  case X86::BI__builtin_ia32_pslldqi256: {
6371  // Shift value is in bits so divide by 8.
6372  unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
6373 
6374  // If pslldq is shifting the vector more than 15 bytes, emit zero.
6375  if (shiftVal >= 16)
6376  return llvm::Constant::getNullValue(ConvertType(E->getType()));
6377 
6378  uint32_t Indices[32];
6379  // 256-bit pslldq operates on 128-bit lanes so we need to handle that
6380  for (unsigned l = 0; l != 32; l += 16) {
6381  for (unsigned i = 0; i != 16; ++i) {
6382  unsigned Idx = 32 + i - shiftVal;
6383  if (Idx < 32) Idx -= 16; // end of lane, switch operand.
6384  Indices[l + i] = Idx + l;
6385  }
6386  }
6387 
6388  llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
6389  Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
6390  Value *Zero = llvm::Constant::getNullValue(VecTy);
6391 
6392  Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
6393  SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq");
6394  llvm::Type *ResultType = ConvertType(E->getType());
6395  return Builder.CreateBitCast(SV, ResultType, "cast");
6396  }
6397  case X86::BI__builtin_ia32_psrldqi256: {
6398  // Shift value is in bits so divide by 8.
6399  unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
6400 
6401  // If psrldq is shifting the vector more than 15 bytes, emit zero.
6402  if (shiftVal >= 16)
6403  return llvm::Constant::getNullValue(ConvertType(E->getType()));
6404 
6405  uint32_t Indices[32];
6406  // 256-bit psrldq operates on 128-bit lanes so we need to handle that
6407  for (unsigned l = 0; l != 32; l += 16) {
6408  for (unsigned i = 0; i != 16; ++i) {
6409  unsigned Idx = i + shiftVal;
6410  if (Idx >= 16) Idx += 16; // end of lane, switch operand.
6411  Indices[l + i] = Idx + l;
6412  }
6413  }
6414 
6415  llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
6416  Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
6417  Value *Zero = llvm::Constant::getNullValue(VecTy);
6418 
6419  Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
6420  SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq");
6421  llvm::Type *ResultType = ConvertType(E->getType());
6422  return Builder.CreateBitCast(SV, ResultType, "cast");
6423  }
6424  case X86::BI__builtin_ia32_movntps:
6425  case X86::BI__builtin_ia32_movntps256:
6426  case X86::BI__builtin_ia32_movntpd:
6427  case X86::BI__builtin_ia32_movntpd256:
6428  case X86::BI__builtin_ia32_movntdq:
6429  case X86::BI__builtin_ia32_movntdq256:
6430  case X86::BI__builtin_ia32_movnti:
6431  case X86::BI__builtin_ia32_movnti64: {
6432  llvm::MDNode *Node = llvm::MDNode::get(
6433  getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
6434 
6435  // Convert the type of the pointer to a pointer to the stored type.
6436  Value *BC = Builder.CreateBitCast(Ops[0],
6437  llvm::PointerType::getUnqual(Ops[1]->getType()),
6438  "cast");
6439  StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC);
6440  SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
6441 
6442  // If the operand is an integer, we can't assume alignment. Otherwise,
6443  // assume natural alignment.
6444  QualType ArgTy = E->getArg(1)->getType();
6445  unsigned Align;
6446  if (ArgTy->isIntegerType())
6447  Align = 1;
6448  else
6449  Align = getContext().getTypeSizeInChars(ArgTy).getQuantity();
6450  SI->setAlignment(Align);
6451  return SI;
6452  }
6453  // 3DNow!
6454  case X86::BI__builtin_ia32_pswapdsf:
6455  case X86::BI__builtin_ia32_pswapdsi: {
6456  llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
6457  Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
6458  llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
6459  return Builder.CreateCall(F, Ops, "pswapd");
6460  }
6461  case X86::BI__builtin_ia32_rdrand16_step:
6462  case X86::BI__builtin_ia32_rdrand32_step:
6463  case X86::BI__builtin_ia32_rdrand64_step:
6464  case X86::BI__builtin_ia32_rdseed16_step:
6465  case X86::BI__builtin_ia32_rdseed32_step:
6466  case X86::BI__builtin_ia32_rdseed64_step: {
6467  Intrinsic::ID ID;
6468  switch (BuiltinID) {
6469  default: llvm_unreachable("Unsupported intrinsic!");
6470  case X86::BI__builtin_ia32_rdrand16_step:
6471  ID = Intrinsic::x86_rdrand_16;
6472  break;
6473  case X86::BI__builtin_ia32_rdrand32_step:
6474  ID = Intrinsic::x86_rdrand_32;
6475  break;
6476  case X86::BI__builtin_ia32_rdrand64_step:
6477  ID = Intrinsic::x86_rdrand_64;
6478  break;
6479  case X86::BI__builtin_ia32_rdseed16_step:
6480  ID = Intrinsic::x86_rdseed_16;
6481  break;
6482  case X86::BI__builtin_ia32_rdseed32_step:
6483  ID = Intrinsic::x86_rdseed_32;
6484  break;
6485  case X86::BI__builtin_ia32_rdseed64_step:
6486  ID = Intrinsic::x86_rdseed_64;
6487  break;
6488  }
6489 
6490  Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
6491  Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
6492  Ops[0]);
6493  return Builder.CreateExtractValue(Call, 1);
6494  }
6495  // SSE comparison intrisics
6496  case X86::BI__builtin_ia32_cmpeqps:
6497  case X86::BI__builtin_ia32_cmpltps:
6498  case X86::BI__builtin_ia32_cmpleps:
6499  case X86::BI__builtin_ia32_cmpunordps:
6500  case X86::BI__builtin_ia32_cmpneqps:
6501  case X86::BI__builtin_ia32_cmpnltps:
6502  case X86::BI__builtin_ia32_cmpnleps:
6503  case X86::BI__builtin_ia32_cmpordps:
6504  case X86::BI__builtin_ia32_cmpeqss:
6505  case X86::BI__builtin_ia32_cmpltss:
6506  case X86::BI__builtin_ia32_cmpless:
6507  case X86::BI__builtin_ia32_cmpunordss:
6508  case X86::BI__builtin_ia32_cmpneqss:
6509  case X86::BI__builtin_ia32_cmpnltss:
6510  case X86::BI__builtin_ia32_cmpnless:
6511  case X86::BI__builtin_ia32_cmpordss:
6512  case X86::BI__builtin_ia32_cmpeqpd:
6513  case X86::BI__builtin_ia32_cmpltpd:
6514  case X86::BI__builtin_ia32_cmplepd:
6515  case X86::BI__builtin_ia32_cmpunordpd:
6516  case X86::BI__builtin_ia32_cmpneqpd:
6517  case X86::BI__builtin_ia32_cmpnltpd:
6518  case X86::BI__builtin_ia32_cmpnlepd:
6519  case X86::BI__builtin_ia32_cmpordpd:
6520  case X86::BI__builtin_ia32_cmpeqsd:
6521  case X86::BI__builtin_ia32_cmpltsd:
6522  case X86::BI__builtin_ia32_cmplesd:
6523  case X86::BI__builtin_ia32_cmpunordsd:
6524  case X86::BI__builtin_ia32_cmpneqsd:
6525  case X86::BI__builtin_ia32_cmpnltsd:
6526  case X86::BI__builtin_ia32_cmpnlesd:
6527  case X86::BI__builtin_ia32_cmpordsd:
6528  // These exist so that the builtin that takes an immediate can be bounds
6529  // checked by clang to avoid passing bad immediates to the backend. Since
6530  // AVX has a larger immediate than SSE we would need separate builtins to
6531  // do the different bounds checking. Rather than create a clang specific
6532  // SSE only builtin, this implements eight separate builtins to match gcc
6533  // implementation.
6534 
6535  // Choose the immediate.
6536  unsigned Imm;
6537  switch (BuiltinID) {
6538  default: llvm_unreachable("Unsupported intrinsic!");
6539  case X86::BI__builtin_ia32_cmpeqps:
6540  case X86::BI__builtin_ia32_cmpeqss:
6541  case X86::BI__builtin_ia32_cmpeqpd:
6542  case X86::BI__builtin_ia32_cmpeqsd:
6543  Imm = 0;
6544  break;
6545  case X86::BI__builtin_ia32_cmpltps:
6546  case X86::BI__builtin_ia32_cmpltss:
6547  case X86::BI__builtin_ia32_cmpltpd:
6548  case X86::BI__builtin_ia32_cmpltsd:
6549  Imm = 1;
6550  break;
6551  case X86::BI__builtin_ia32_cmpleps:
6552  case X86::BI__builtin_ia32_cmpless:
6553  case X86::BI__builtin_ia32_cmplepd:
6554  case X86::BI__builtin_ia32_cmplesd:
6555  Imm = 2;
6556  break;
6557  case X86::BI__builtin_ia32_cmpunordps:
6558  case X86::BI__builtin_ia32_cmpunordss:
6559  case X86::BI__builtin_ia32_cmpunordpd:
6560  case X86::BI__builtin_ia32_cmpunordsd:
6561  Imm = 3;
6562  break;
6563  case X86::BI__builtin_ia32_cmpneqps:
6564  case X86::BI__builtin_ia32_cmpneqss:
6565  case X86::BI__builtin_ia32_cmpneqpd:
6566  case X86::BI__builtin_ia32_cmpneqsd:
6567  Imm = 4;
6568  break;
6569  case X86::BI__builtin_ia32_cmpnltps:
6570  case X86::BI__builtin_ia32_cmpnltss:
6571  case X86::BI__builtin_ia32_cmpnltpd:
6572  case X86::BI__builtin_ia32_cmpnltsd:
6573  Imm = 5;
6574  break;
6575  case X86::BI__builtin_ia32_cmpnleps:
6576  case X86::BI__builtin_ia32_cmpnless:
6577  case X86::BI__builtin_ia32_cmpnlepd:
6578  case X86::BI__builtin_ia32_cmpnlesd:
6579  Imm = 6;
6580  break;
6581  case X86::BI__builtin_ia32_cmpordps:
6582  case X86::BI__builtin_ia32_cmpordss:
6583  case X86::BI__builtin_ia32_cmpordpd:
6584  case X86::BI__builtin_ia32_cmpordsd:
6585  Imm = 7;
6586  break;
6587  }
6588 
6589  // Choose the intrinsic ID.
6590  const char *name;
6591  Intrinsic::ID ID;
6592  switch (BuiltinID) {
6593  default: llvm_unreachable("Unsupported intrinsic!");
6594  case X86::BI__builtin_ia32_cmpeqps:
6595  case X86::BI__builtin_ia32_cmpltps:
6596  case X86::BI__builtin_ia32_cmpleps:
6597  case X86::BI__builtin_ia32_cmpunordps:
6598  case X86::BI__builtin_ia32_cmpneqps:
6599  case X86::BI__builtin_ia32_cmpnltps:
6600  case X86::BI__builtin_ia32_cmpnleps:
6601  case X86::BI__builtin_ia32_cmpordps:
6602  name = "cmpps";
6603  ID = Intrinsic::x86_sse_cmp_ps;
6604  break;
6605  case X86::BI__builtin_ia32_cmpeqss:
6606  case X86::BI__builtin_ia32_cmpltss:
6607  case X86::BI__builtin_ia32_cmpless:
6608  case X86::BI__builtin_ia32_cmpunordss:
6609  case X86::BI__builtin_ia32_cmpneqss:
6610  case X86::BI__builtin_ia32_cmpnltss:
6611  case X86::BI__builtin_ia32_cmpnless:
6612  case X86::BI__builtin_ia32_cmpordss:
6613  name = "cmpss";
6614  ID = Intrinsic::x86_sse_cmp_ss;
6615  break;
6616  case X86::BI__builtin_ia32_cmpeqpd:
6617  case X86::BI__builtin_ia32_cmpltpd:
6618  case X86::BI__builtin_ia32_cmplepd:
6619  case X86::BI__builtin_ia32_cmpunordpd:
6620  case X86::BI__builtin_ia32_cmpneqpd:
6621  case X86::BI__builtin_ia32_cmpnltpd:
6622  case X86::BI__builtin_ia32_cmpnlepd:
6623  case X86::BI__builtin_ia32_cmpordpd:
6624  name = "cmppd";
6625  ID = Intrinsic::x86_sse2_cmp_pd;
6626  break;
6627  case X86::BI__builtin_ia32_cmpeqsd:
6628  case X86::BI__builtin_ia32_cmpltsd:
6629  case X86::BI__builtin_ia32_cmplesd:
6630  case X86::BI__builtin_ia32_cmpunordsd:
6631  case X86::BI__builtin_ia32_cmpneqsd:
6632  case X86::BI__builtin_ia32_cmpnltsd:
6633  case X86::BI__builtin_ia32_cmpnlesd:
6634  case X86::BI__builtin_ia32_cmpordsd:
6635  name = "cmpsd";
6636  ID = Intrinsic::x86_sse2_cmp_sd;
6637  break;
6638  }
6639 
6640  Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
6641  llvm::Function *F = CGM.getIntrinsic(ID);
6642  return Builder.CreateCall(F, Ops, name);
6643  }
6644 }
6645 
6646 
6647 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
6648  const CallExpr *E) {
6650 
6651  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
6652  Ops.push_back(EmitScalarExpr(E->getArg(i)));
6653 
6654  Intrinsic::ID ID = Intrinsic::not_intrinsic;
6655 
6656  switch (BuiltinID) {
6657  default: return nullptr;
6658 
6659  // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
6660  // call __builtin_readcyclecounter.
6661  case PPC::BI__builtin_ppc_get_timebase:
6662  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
6663 
6664  // vec_ld, vec_lvsl, vec_lvsr
6665  case PPC::BI__builtin_altivec_lvx:
6666  case PPC::BI__builtin_altivec_lvxl:
6667  case PPC::BI__builtin_altivec_lvebx:
6668  case PPC::BI__builtin_altivec_lvehx:
6669  case PPC::BI__builtin_altivec_lvewx:
6670  case PPC::BI__builtin_altivec_lvsl:
6671  case PPC::BI__builtin_altivec_lvsr:
6672  case PPC::BI__builtin_vsx_lxvd2x:
6673  case PPC::BI__builtin_vsx_lxvw4x:
6674  {
6675  Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
6676 
6677  Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
6678  Ops.pop_back();
6679 
6680  switch (BuiltinID) {
6681  default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
6682  case PPC::BI__builtin_altivec_lvx:
6683  ID = Intrinsic::ppc_altivec_lvx;
6684  break;
6685  case PPC::BI__builtin_altivec_lvxl:
6686  ID = Intrinsic::ppc_altivec_lvxl;
6687  break;
6688  case PPC::BI__builtin_altivec_lvebx:
6689  ID = Intrinsic::ppc_altivec_lvebx;
6690  break;
6691  case PPC::BI__builtin_altivec_lvehx:
6692  ID = Intrinsic::ppc_altivec_lvehx;
6693  break;
6694  case PPC::BI__builtin_altivec_lvewx:
6695  ID = Intrinsic::ppc_altivec_lvewx;
6696  break;
6697  case PPC::BI__builtin_altivec_lvsl:
6698  ID = Intrinsic::ppc_altivec_lvsl;
6699  break;
6700  case PPC::BI__builtin_altivec_lvsr:
6701  ID = Intrinsic::ppc_altivec_lvsr;
6702  break;
6703  case PPC::BI__builtin_vsx_lxvd2x:
6704  ID = Intrinsic::ppc_vsx_lxvd2x;
6705  break;
6706  case PPC::BI__builtin_vsx_lxvw4x:
6707  ID = Intrinsic::ppc_vsx_lxvw4x;
6708  break;
6709  }
6710  llvm::Function *F = CGM.getIntrinsic(ID);
6711  return Builder.CreateCall(F, Ops, "");
6712  }
6713 
6714  // vec_st
6715  case PPC::BI__builtin_altivec_stvx:
6716  case PPC::BI__builtin_altivec_stvxl:
6717  case PPC::BI__builtin_altivec_stvebx:
6718  case PPC::BI__builtin_altivec_stvehx:
6719  case PPC::BI__builtin_altivec_stvewx:
6720  case PPC::BI__builtin_vsx_stxvd2x:
6721  case PPC::BI__builtin_vsx_stxvw4x:
6722  {
6723  Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
6724  Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
6725  Ops.pop_back();
6726 
6727  switch (BuiltinID) {
6728  default: llvm_unreachable("Unsupported st intrinsic!");
6729  case PPC::BI__builtin_altivec_stvx:
6730  ID = Intrinsic::ppc_altivec_stvx;
6731  break;
6732  case PPC::BI__builtin_altivec_stvxl:
6733  ID = Intrinsic::ppc_altivec_stvxl;
6734  break;
6735  case PPC::BI__builtin_altivec_stvebx:
6736  ID = Intrinsic::ppc_altivec_stvebx;
6737  break;
6738  case PPC::BI__builtin_altivec_stvehx:
6739  ID = Intrinsic::ppc_altivec_stvehx;
6740  break;
6741  case PPC::BI__builtin_altivec_stvewx:
6742  ID = Intrinsic::ppc_altivec_stvewx;
6743  break;
6744  case PPC::BI__builtin_vsx_stxvd2x:
6745  ID = Intrinsic::ppc_vsx_stxvd2x;
6746  break;
6747  case PPC::BI__builtin_vsx_stxvw4x:
6748  ID = Intrinsic::ppc_vsx_stxvw4x;
6749  break;
6750  }
6751  llvm::Function *F = CGM.getIntrinsic(ID);
6752  return Builder.CreateCall(F, Ops, "");
6753  }
6754  // Square root
6755  case PPC::BI__builtin_vsx_xvsqrtsp:
6756  case PPC::BI__builtin_vsx_xvsqrtdp: {
6757  llvm::Type *ResultType = ConvertType(E->getType());
6758  Value *X = EmitScalarExpr(E->getArg(0));
6759  ID = Intrinsic::sqrt;
6760  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
6761  return Builder.CreateCall(F, X);
6762  }
6763  // Count leading zeros
6764  case PPC::BI__builtin_altivec_vclzb:
6765  case PPC::BI__builtin_altivec_vclzh:
6766  case PPC::BI__builtin_altivec_vclzw:
6767  case PPC::BI__builtin_altivec_vclzd: {
6768  llvm::Type *ResultType = ConvertType(E->getType());
6769  Value *X = EmitScalarExpr(E->getArg(0));
6770  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
6771  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
6772  return Builder.CreateCall(F, {X, Undef});
6773  }
6774  // Copy sign
6775  case PPC::BI__builtin_vsx_xvcpsgnsp:
6776  case PPC::BI__builtin_vsx_xvcpsgndp: {
6777  llvm::Type *ResultType = ConvertType(E->getType());
6778  Value *X = EmitScalarExpr(E->getArg(0));
6779  Value *Y = EmitScalarExpr(E->getArg(1));
6780  ID = Intrinsic::copysign;
6781  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
6782  return Builder.CreateCall(F, {X, Y});
6783  }
6784  // Rounding/truncation
6785  case PPC::BI__builtin_vsx_xvrspip:
6786  case PPC::BI__builtin_vsx_xvrdpip:
6787  case PPC::BI__builtin_vsx_xvrdpim:
6788  case PPC::BI__builtin_vsx_xvrspim:
6789  case PPC::BI__builtin_vsx_xvrdpi:
6790  case PPC::BI__builtin_vsx_xvrspi:
6791  case PPC::BI__builtin_vsx_xvrdpic:
6792  case PPC::BI__builtin_vsx_xvrspic:
6793  case PPC::BI__builtin_vsx_xvrdpiz:
6794  case PPC::BI__builtin_vsx_xvrspiz: {
6795  llvm::Type *ResultType = ConvertType(E->getType());
6796  Value *X = EmitScalarExpr(E->getArg(0));
6797  if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
6798  BuiltinID == PPC::BI__builtin_vsx_xvrspim)
6799  ID = Intrinsic::floor;
6800  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
6801  BuiltinID == PPC::BI__builtin_vsx_xvrspi)
6802  ID = Intrinsic::round;
6803  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
6804  BuiltinID == PPC::BI__builtin_vsx_xvrspic)
6805  ID = Intrinsic::nearbyint;
6806  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
6807  BuiltinID == PPC::BI__builtin_vsx_xvrspip)
6808  ID = Intrinsic::ceil;
6809  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
6810  BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
6811  ID = Intrinsic::trunc;
6812  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
6813  return Builder.CreateCall(F, X);
6814  }
6815  // FMA variations
6816  case PPC::BI__builtin_vsx_xvmaddadp:
6817  case PPC::BI__builtin_vsx_xvmaddasp:
6818  case PPC::BI__builtin_vsx_xvnmaddadp:
6819  case PPC::BI__builtin_vsx_xvnmaddasp:
6820  case PPC::BI__builtin_vsx_xvmsubadp:
6821  case PPC::BI__builtin_vsx_xvmsubasp:
6822  case PPC::BI__builtin_vsx_xvnmsubadp:
6823  case PPC::BI__builtin_vsx_xvnmsubasp: {
6824  llvm::Type *ResultType = ConvertType(E->getType());
6825  Value *X = EmitScalarExpr(E->getArg(0));
6826  Value *Y = EmitScalarExpr(E->getArg(1));
6827  Value *Z = EmitScalarExpr(E->getArg(2));
6828  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
6829  llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
6830  switch (BuiltinID) {
6831  case PPC::BI__builtin_vsx_xvmaddadp:
6832  case PPC::BI__builtin_vsx_xvmaddasp:
6833  return Builder.CreateCall(F, {X, Y, Z});
6834  case PPC::BI__builtin_vsx_xvnmaddadp:
6835  case PPC::BI__builtin_vsx_xvnmaddasp:
6836  return Builder.CreateFSub(Zero,
6837  Builder.CreateCall(F, {X, Y, Z}), "sub");
6838  case PPC::BI__builtin_vsx_xvmsubadp:
6839  case PPC::BI__builtin_vsx_xvmsubasp:
6840  return Builder.CreateCall(F,
6841  {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
6842  case PPC::BI__builtin_vsx_xvnmsubadp:
6843  case PPC::BI__builtin_vsx_xvnmsubasp:
6844  Value *FsubRes =
6845  Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
6846  return Builder.CreateFSub(Zero, FsubRes, "sub");
6847  }
6848  llvm_unreachable("Unknown FMA operation");
6849  return nullptr; // Suppress no-return warning
6850  }
6851  }
6852 }
6853 
6854 // Emit an intrinsic that has 1 float or double.
6856  const CallExpr *E,
6857  unsigned IntrinsicID) {
6858  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
6859 
6860  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
6861  return CGF.Builder.CreateCall(F, Src0);
6862 }
6863 
6864 // Emit an intrinsic that has 3 float or double operands.
6866  const CallExpr *E,
6867  unsigned IntrinsicID) {
6868  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
6869  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
6870  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
6871 
6872  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
6873  return CGF.Builder.CreateCall(F, {Src0, Src1, Src2});
6874 }
6875 
6876 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
6878  const CallExpr *E,
6879  unsigned IntrinsicID) {
6880  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
6881  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
6882 
6883  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
6884  return CGF.Builder.CreateCall(F, {Src0, Src1});
6885 }
6886 
6887 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
6888  const CallExpr *E) {
6889  switch (BuiltinID) {
6890  case AMDGPU::BI__builtin_amdgpu_div_scale:
6891  case AMDGPU::BI__builtin_amdgpu_div_scalef: {
6892  // Translate from the intrinsics's struct return to the builtin's out
6893  // argument.
6894 
6895  Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
6896 
6897  llvm::Value *X = EmitScalarExpr(E->getArg(0));
6898  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
6899  llvm::Value *Z = EmitScalarExpr(E->getArg(2));
6900 
6901  llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale,
6902  X->getType());
6903 
6904  llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
6905 
6906  llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
6907  llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
6908 
6909  llvm::Type *RealFlagType
6910  = FlagOutPtr.getPointer()->getType()->getPointerElementType();
6911 
6912  llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
6913  Builder.CreateStore(FlagExt, FlagOutPtr);
6914  return Result;
6915  }
6916  case AMDGPU::BI__builtin_amdgpu_div_fmas:
6917  case AMDGPU::BI__builtin_amdgpu_div_fmasf: {
6918  llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
6919  llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
6920  llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
6921  llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
6922 
6923  llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas,
6924  Src0->getType());
6925  llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
6926  return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
6927  }
6928  case AMDGPU::BI__builtin_amdgpu_div_fixup:
6929  case AMDGPU::BI__builtin_amdgpu_div_fixupf:
6930  return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup);
6931  case AMDGPU::BI__builtin_amdgpu_trig_preop:
6932  case AMDGPU::BI__builtin_amdgpu_trig_preopf:
6933  return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop);
6934  case AMDGPU::BI__builtin_amdgpu_rcp:
6935  case AMDGPU::BI__builtin_amdgpu_rcpf:
6936  return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp);
6937  case AMDGPU::BI__builtin_amdgpu_rsq:
6938  case AMDGPU::BI__builtin_amdgpu_rsqf:
6939  return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq);
6940  case AMDGPU::BI__builtin_amdgpu_rsq_clamped:
6941  case AMDGPU::BI__builtin_amdgpu_rsq_clampedf:
6942  return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped);
6943  case AMDGPU::BI__builtin_amdgpu_ldexp:
6944  case AMDGPU::BI__builtin_amdgpu_ldexpf:
6945  return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp);
6946  case AMDGPU::BI__builtin_amdgpu_class:
6947  case AMDGPU::BI__builtin_amdgpu_classf:
6948  return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class);
6949  default:
6950  return nullptr;
6951  }
6952 }
6953 
6954 /// Handle a SystemZ function in which the final argument is a pointer
6955 /// to an int that receives the post-instruction CC value. At the LLVM level
6956 /// this is represented as a function that returns a {result, cc} pair.
6958  unsigned IntrinsicID,
6959  const CallExpr *E) {
6960  unsigned NumArgs = E->getNumArgs() - 1;
6961  SmallVector<Value *, 8> Args(NumArgs);
6962  for (unsigned I = 0; I < NumArgs; ++I)
6963  Args[I] = CGF.EmitScalarExpr(E->getArg(I));
6964  Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
6965  Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
6966  Value *Call = CGF.Builder.CreateCall(F, Args);
6967  Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
6968  CGF.Builder.CreateStore(CC, CCPtr);
6969  return CGF.Builder.CreateExtractValue(Call, 0);
6970 }
6971 
6973  const CallExpr *E) {
6974  switch (BuiltinID) {
6975  case SystemZ::BI__builtin_tbegin: {
6976  Value *TDB = EmitScalarExpr(E->getArg(0));
6977  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
6978  Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
6979  return Builder.CreateCall(F, {TDB, Control});
6980  }
6981  case SystemZ::BI__builtin_tbegin_nofloat: {
6982  Value *TDB = EmitScalarExpr(E->getArg(0));
6983  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
6984  Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
6985  return Builder.CreateCall(F, {TDB, Control});
6986  }
6987  case SystemZ::BI__builtin_tbeginc: {
6988  Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
6989  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
6990  Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
6991  return Builder.CreateCall(F, {TDB, Control});
6992  }
6993  case SystemZ::BI__builtin_tabort: {
6994  Value *Data = EmitScalarExpr(E->getArg(0));
6995  Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
6996  return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
6997  }
6998  case SystemZ::BI__builtin_non_tx_store: {
6999  Value *Address = EmitScalarExpr(E->getArg(0));
7000  Value *Data = EmitScalarExpr(E->getArg(1));
7001  Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
7002  return Builder.CreateCall(F, {Data, Address});
7003  }
7004 
7005  // Vector builtins. Note that most vector builtins are mapped automatically
7006  // to target-specific LLVM intrinsics. The ones handled specially here can
7007  // be represented via standard LLVM IR, which is preferable to enable common
7008  // LLVM optimizations.
7009 
7010  case SystemZ::BI__builtin_s390_vpopctb:
7011  case SystemZ::BI__builtin_s390_vpopcth:
7012  case SystemZ::BI__builtin_s390_vpopctf:
7013  case SystemZ::BI__builtin_s390_vpopctg: {
7014  llvm::Type *ResultType = ConvertType(E->getType());
7015  Value *X = EmitScalarExpr(E->getArg(0));
7016  Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
7017  return Builder.CreateCall(F, X);
7018  }
7019 
7020  case SystemZ::BI__builtin_s390_vclzb:
7021  case SystemZ::BI__builtin_s390_vclzh:
7022  case SystemZ::BI__builtin_s390_vclzf:
7023  case SystemZ::BI__builtin_s390_vclzg: {
7024  llvm::Type *ResultType = ConvertType(E->getType());
7025  Value *X = EmitScalarExpr(E->getArg(0));
7026  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7027  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
7028  return Builder.CreateCall(F, {X, Undef});
7029  }
7030 
7031  case SystemZ::BI__builtin_s390_vctzb:
7032  case SystemZ::BI__builtin_s390_vctzh:
7033  case SystemZ::BI__builtin_s390_vctzf:
7034  case SystemZ::BI__builtin_s390_vctzg: {
7035  llvm::Type *ResultType = ConvertType(E->getType());
7036  Value *X = EmitScalarExpr(E->getArg(0));
7037  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7038  Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
7039  return Builder.CreateCall(F, {X, Undef});
7040  }
7041 
7042  case SystemZ::BI__builtin_s390_vfsqdb: {
7043  llvm::Type *ResultType = ConvertType(E->getType());
7044  Value *X = EmitScalarExpr(E->getArg(0));
7045  Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
7046  return Builder.CreateCall(F, X);
7047  }
7048  case SystemZ::BI__builtin_s390_vfmadb: {
7049  llvm::Type *ResultType = ConvertType(E->getType());
7050  Value *X = EmitScalarExpr(E->getArg(0));
7051  Value *Y = EmitScalarExpr(E->getArg(1));
7052  Value *Z = EmitScalarExpr(E->getArg(2));
7053  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7054  return Builder.CreateCall(F, {X, Y, Z});
7055  }
7056  case SystemZ::BI__builtin_s390_vfmsdb: {
7057  llvm::Type *ResultType = ConvertType(E->getType());
7058  Value *X = EmitScalarExpr(E->getArg(0));
7059  Value *Y = EmitScalarExpr(E->getArg(1));
7060  Value *Z = EmitScalarExpr(E->getArg(2));
7061  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7062  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7063  return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7064  }
7065  case SystemZ::BI__builtin_s390_vflpdb: {
7066  llvm::Type *ResultType = ConvertType(E->getType());
7067  Value *X = EmitScalarExpr(E->getArg(0));
7068  Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7069  return Builder.CreateCall(F, X);
7070  }
7071  case SystemZ::BI__builtin_s390_vflndb: {
7072  llvm::Type *ResultType = ConvertType(E->getType());
7073  Value *X = EmitScalarExpr(E->getArg(0));
7074  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7075  Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7076  return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
7077  }
7078  case SystemZ::BI__builtin_s390_vfidb: {
7079  llvm::Type *ResultType = ConvertType(E->getType());
7080  Value *X = EmitScalarExpr(E->getArg(0));
7081  // Constant-fold the M4 and M5 mask arguments.
7082  llvm::APSInt M4, M5;
7083  bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
7084  bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
7085  assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
7086  (void)IsConstM4; (void)IsConstM5;
7087  // Check whether this instance of vfidb can be represented via a LLVM
7088  // standard intrinsic. We only support some combinations of M4 and M5.
7089  Intrinsic::ID ID = Intrinsic::not_intrinsic;
7090  switch (M4.getZExtValue()) {
7091  default: break;
7092  case 0: // IEEE-inexact exception allowed
7093  switch (M5.getZExtValue()) {
7094  default: break;
7095  case 0: ID = Intrinsic::rint; break;
7096  }
7097  break;
7098  case 4: // IEEE-inexact exception suppressed
7099  switch (M5.getZExtValue()) {
7100  default: break;
7101  case 0: ID = Intrinsic::nearbyint; break;
7102  case 1: ID = Intrinsic::round; break;
7103  case 5: ID = Intrinsic::trunc; break;
7104  case 6: ID = Intrinsic::ceil; break;
7105  case 7: ID = Intrinsic::floor; break;
7106  }
7107  break;
7108  }
7109  if (ID != Intrinsic::not_intrinsic) {
7110  Function *F = CGM.getIntrinsic(ID, ResultType);
7111  return Builder.CreateCall(F, X);
7112  }
7113  Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
7114  Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
7115  Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
7116  return Builder.CreateCall(F, {X, M4Value, M5Value});
7117  }
7118 
7119  // Vector intrisincs that output the post-instruction CC value.
7120 
7121 #define INTRINSIC_WITH_CC(NAME) \
7122  case SystemZ::BI__builtin_##NAME: \
7123  return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
7124 
7125  INTRINSIC_WITH_CC(s390_vpkshs);
7126  INTRINSIC_WITH_CC(s390_vpksfs);
7127  INTRINSIC_WITH_CC(s390_vpksgs);
7128 
7129  INTRINSIC_WITH_CC(s390_vpklshs);
7130  INTRINSIC_WITH_CC(s390_vpklsfs);
7131  INTRINSIC_WITH_CC(s390_vpklsgs);
7132 
7133  INTRINSIC_WITH_CC(s390_vceqbs);
7134  INTRINSIC_WITH_CC(s390_vceqhs);
7135  INTRINSIC_WITH_CC(s390_vceqfs);
7136  INTRINSIC_WITH_CC(s390_vceqgs);
7137 
7138  INTRINSIC_WITH_CC(s390_vchbs);
7139  INTRINSIC_WITH_CC(s390_vchhs);
7140  INTRINSIC_WITH_CC(s390_vchfs);
7141  INTRINSIC_WITH_CC(s390_vchgs);
7142 
7143  INTRINSIC_WITH_CC(s390_vchlbs);
7144  INTRINSIC_WITH_CC(s390_vchlhs);
7145  INTRINSIC_WITH_CC(s390_vchlfs);
7146  INTRINSIC_WITH_CC(s390_vchlgs);
7147 
7148  INTRINSIC_WITH_CC(s390_vfaebs);
7149  INTRINSIC_WITH_CC(s390_vfaehs);
7150  INTRINSIC_WITH_CC(s390_vfaefs);
7151 
7152  INTRINSIC_WITH_CC(s390_vfaezbs);
7153  INTRINSIC_WITH_CC(s390_vfaezhs);
7154  INTRINSIC_WITH_CC(s390_vfaezfs);
7155 
7156  INTRINSIC_WITH_CC(s390_vfeebs);
7157  INTRINSIC_WITH_CC(s390_vfeehs);
7158  INTRINSIC_WITH_CC(s390_vfeefs);
7159 
7160  INTRINSIC_WITH_CC(s390_vfeezbs);
7161  INTRINSIC_WITH_CC(s390_vfeezhs);
7162  INTRINSIC_WITH_CC(s390_vfeezfs);
7163 
7164  INTRINSIC_WITH_CC(s390_vfenebs);
7165  INTRINSIC_WITH_CC(s390_vfenehs);
7166  INTRINSIC_WITH_CC(s390_vfenefs);
7167 
7168  INTRINSIC_WITH_CC(s390_vfenezbs);
7169  INTRINSIC_WITH_CC(s390_vfenezhs);
7170  INTRINSIC_WITH_CC(s390_vfenezfs);
7171 
7172  INTRINSIC_WITH_CC(s390_vistrbs);
7173  INTRINSIC_WITH_CC(s390_vistrhs);
7174  INTRINSIC_WITH_CC(s390_vistrfs);
7175 
7176  INTRINSIC_WITH_CC(s390_vstrcbs);
7177  INTRINSIC_WITH_CC(s390_vstrchs);
7178  INTRINSIC_WITH_CC(s390_vstrcfs);
7179 
7180  INTRINSIC_WITH_CC(s390_vstrczbs);
7181  INTRINSIC_WITH_CC(s390_vstrczhs);
7182  INTRINSIC_WITH_CC(s390_vstrczfs);
7183 
7184  INTRINSIC_WITH_CC(s390_vfcedbs);
7185  INTRINSIC_WITH_CC(s390_vfchdbs);
7186  INTRINSIC_WITH_CC(s390_vfchedbs);
7187 
7188  INTRINSIC_WITH_CC(s390_vftcidb);
7189 
7190 #undef INTRINSIC_WITH_CC
7191 
7192  default:
7193  return nullptr;
7194  }
7195 }
7196 
7197 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
7198  const CallExpr *E) {
7199  switch (BuiltinID) {
7200  case NVPTX::BI__nvvm_atom_add_gen_i:
7201  case NVPTX::BI__nvvm_atom_add_gen_l:
7202  case NVPTX::BI__nvvm_atom_add_gen_ll:
7203  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
7204 
7205  case NVPTX::BI__nvvm_atom_sub_gen_i:
7206  case NVPTX::BI__nvvm_atom_sub_gen_l:
7207  case NVPTX::BI__nvvm_atom_sub_gen_ll:
7208  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
7209 
7210  case NVPTX::BI__nvvm_atom_and_gen_i:
7211  case NVPTX::BI__nvvm_atom_and_gen_l:
7212  case NVPTX::BI__nvvm_atom_and_gen_ll:
7214 
7215  case NVPTX::BI__nvvm_atom_or_gen_i:
7216  case NVPTX::BI__nvvm_atom_or_gen_l:
7217  case NVPTX::BI__nvvm_atom_or_gen_ll:
7218  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
7219 
7220  case NVPTX::BI__nvvm_atom_xor_gen_i:
7221  case NVPTX::BI__nvvm_atom_xor_gen_l:
7222  case NVPTX::BI__nvvm_atom_xor_gen_ll:
7223  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
7224 
7225  case NVPTX::BI__nvvm_atom_xchg_gen_i:
7226  case NVPTX::BI__nvvm_atom_xchg_gen_l:
7227  case NVPTX::BI__nvvm_atom_xchg_gen_ll:
7228  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
7229 
7230  case NVPTX::BI__nvvm_atom_max_gen_i:
7231  case NVPTX::BI__nvvm_atom_max_gen_l:
7232  case NVPTX::BI__nvvm_atom_max_gen_ll:
7233  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
7234 
7235  case NVPTX::BI__nvvm_atom_max_gen_ui:
7236  case NVPTX::BI__nvvm_atom_max_gen_ul:
7237  case NVPTX::BI__nvvm_atom_max_gen_ull:
7238  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
7239 
7240  case NVPTX::BI__nvvm_atom_min_gen_i:
7241  case NVPTX::BI__nvvm_atom_min_gen_l:
7242  case NVPTX::BI__nvvm_atom_min_gen_ll:
7243  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
7244 
7245  case NVPTX::BI__nvvm_atom_min_gen_ui:
7246  case NVPTX::BI__nvvm_atom_min_gen_ul:
7247  case NVPTX::BI__nvvm_atom_min_gen_ull:
7248  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
7249 
7250  case NVPTX::BI__nvvm_atom_cas_gen_i:
7251  case NVPTX::BI__nvvm_atom_cas_gen_l:
7252  case NVPTX::BI__nvvm_atom_cas_gen_ll:
7253  // __nvvm_atom_cas_gen_* should return the old value rather than the
7254  // success flag.
7255  return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
7256 
7257  case NVPTX::BI__nvvm_atom_add_gen_f: {
7258  Value *Ptr = EmitScalarExpr(E->getArg(0));
7259  Value *Val = EmitScalarExpr(E->getArg(1));
7260  // atomicrmw only deals with integer arguments so we need to use
7261  // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
7262  Value *FnALAF32 =
7263  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
7264  return Builder.CreateCall(FnALAF32, {Ptr, Val});
7265  }
7266 
7267  default:
7268  return nullptr;
7269  }
7270 }
7271 
7273  const CallExpr *E) {
7274  switch (BuiltinID) {
7275  case WebAssembly::BI__builtin_wasm_memory_size: {
7276  llvm::Type *ResultType = ConvertType(E->getType());
7277  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
7278  return Builder.CreateCall(Callee);
7279  }
7280  case WebAssembly::BI__builtin_wasm_grow_memory: {
7281  Value *X = EmitScalarExpr(E->getArg(0));
7282  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
7283  return Builder.CreateCall(Callee, X);
7284  }
7285 
7286  default:
7287  return nullptr;
7288  }
7289 }
ReturnValueSlot - Contains the address where the return value of a function can be stored...
Definition: CGCall.h:151
Defines the clang::ASTContext interface.
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Definition: CGBuilder.h:148
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:6877
FunctionDecl - An instance of this class is created to represent a function declaration or definition...
Definition: Decl.h:1483
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
Definition: CGBuiltin.cpp:3355
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2147
A (possibly-)qualified type.
Definition: Type.h:575
#define fma(__x, __y, __z)
Definition: tgmath.h:749
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:299
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2199
llvm::Module & getModule() const
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:311
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:7197
llvm::LLVMContext & getLLVMContext()
const TargetInfo & getTarget() const
#define trunc(__x)
Definition: tgmath.h:1223
static const Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:21
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:171
llvm::LoadInst * CreateDefaultAlignedLoad(llvm::Value *Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:135
llvm::Type * FloatTy
float, double
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size...
Definition: CGBuiltin.cpp:61
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:26
The base class of the type hierarchy.
Definition: Type.h:1249
bool isBooleanType() const
Definition: Type.h:5609
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:2257
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:2940
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn, const CallExpr *E, llvm::Value *calleeValue)
Definition: CGBuiltin.cpp:261
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:52
const Expr * getCallee() const
Definition: Expr.h:2170
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
Definition: CGBuiltin.cpp:6957
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:1793
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:3545
iterator begin() const
Definition: Type.h:4072
ParmVarDecl - Represents a parameter to a function.
Definition: Decl.h:1299
static llvm::Value * getTypeSize(CodeGenFunction &CGF, QualType Ty)
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:3511
bool isVoidType() const
Definition: Type.h:5546
The collection of all-type qualifiers we support.
Definition: Type.h:116
Expr * IgnoreImpCasts() LLVM_READONLY
IgnoreImpCasts - Skip past any implicit casts which might surround this expression.
Definition: Expr.h:2755
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1)
Definition: CGBuiltin.cpp:2925
class LLVM_ALIGNAS(8) DependentTemplateSpecializationType const IdentifierInfo * Name
Represents a template specialization type whose template cannot be resolved, e.g. ...
Definition: Type.h:4381
#define pow(__x, __y)
Definition: tgmath.h:497
bool hasAttr() const
Definition: DeclBase.h:498
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
static llvm::VectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool V1Ty=false)
Definition: CGBuiltin.cpp:2114
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:91
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:2797
RValue EmitCall(const CGFunctionInfo &FnInfo, llvm::Value *Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, CGCalleeInfo CalleeInfo=CGCalleeInfo(), llvm::Instruction **callOrInvoke=nullptr)
EmitCall - Generate a call of the given function, expecting the given result type, and using the given argument list which specifies both the LLVM arguments and the types they were derived from.
Definition: CGCall.cpp:3159
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Instrinsic::ID and the expression node, where the return value is the result of the operation.
Definition: CGBuiltin.cpp:145
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:1987
static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:2483
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Utility to insert an atomic instruction based on Instrinsic::ID and the expression node...
Definition: CGBuiltin.cpp:85
T * getAttr() const
Definition: DeclBase.h:495
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
Definition: CGBuiltin.cpp:6088
Address CreateElementBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Cast the element type of the given address to a different type, preserving information like the align...
Definition: CGBuilder.h:176
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:2598
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:563
uint32_t Offset
Definition: CacheTokens.cpp:44
#define INTRINSIC_WITH_CC(NAME)
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
bool isQuad() const
bool isUnsigned() const
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:2800
static Value * emitUnaryFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:6855
Expr * IgnoreParenCasts() LLVM_READONLY
IgnoreParenCasts - Ignore parentheses and casts.
Definition: Expr.cpp:2464
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:38
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
iterator end() const
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
RValue EmitBuiltinExpr(const FunctionDecl *FD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
Definition: CGBuiltin.cpp:410
detail::InMemoryDirectory::const_iterator I
QualType getType() const
Definition: Decl.h:530
AnnotatingParser & P
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Represents a prototype with parameter type info, e.g.
Definition: Type.h:3041
llvm::CallInst * EmitNounwindRuntimeCall(llvm::Value *callee, const Twine &name="")
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:2254
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition: CGExpr.cpp:168
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
RValue - This trivial value class is used to represent the result of an expression that is evaluated...
Definition: CGValue.h:38
ASTContext * Context
const SmallVectorImpl< AnnotatedLine * >::const_iterator End
ID
Defines the set of possible language-specific address spaces.
Definition: AddressSpaces.h:27
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:415
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm.va_end.
Definition: CGBuiltin.cpp:335
int * Depth
llvm::Value * getPointer() const
Definition: Address.h:38
#define copysign(__x, __y)
Definition: tgmath.h:625
Expr - This represents one expression.
Definition: Expr.h:104
StringRef getName() const
Return the actual identifier string.
const char * getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:82
static Address invalid()
Definition: Address.h:35
static Value * emitTernaryFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:6865
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource AlignSource=AlignmentSource::Type, llvm::MDNode *TBAAInfo=nullptr, bool isInit=false, QualType TBAABaseTy=QualType(), uint64_t TBAAOffset=0, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
Definition: CGExpr.cpp:1348
#define INTRINSIC_X86_XSAVE_ID(NAME)
ASTContext & getContext() const
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation...
Definition: CGExpr.cpp:1323
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
struct ExtInfo * ExtInfo
Definition: CGCleanup.h:264
void add(RValue rvalue, QualType type, bool needscopy=false)
Definition: CGCall.h:81
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:72
static SVal getValue(SVal val, SValBuilder &svalBuilder)
llvm::LLVMContext & getLLVMContext()
Address EmitPointerWithAlignment(const Expr *Addr, AlignmentSource *Source=nullptr)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition: CGExpr.cpp:795
static const NeonIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:2266
bool EvaluateAsInt(llvm::APSInt &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer...
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p, To) is correct.
Definition: CGBuiltin.cpp:347
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys=None)
class LLVM_ALIGNAS(8) TemplateSpecializationType unsigned NumArgs
Represents a type template specialization; the template must be a class template, a type alias templa...
Definition: Type.h:3988
static const NeonIntrinsicInfo * findNeonIntrinsicInMap(ArrayRef< NeonIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:2804
The result type of a method or function.
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:128
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:6113
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:28
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:6887
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:6647
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:276
Enumerates target-specific builtins in their own namespaces within namespace clang.
Address CreateBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition: CGBuilder.h:168
TypeInfo getTypeInfo(const Type *T) const
Get the size and alignment of the specified complete type in bits.
llvm::Constant * CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeSet ExtraAttrs=llvm::AttributeSet())
Create a new runtime function with the specified type and name.
Kind
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:3376
ASTContext & getContext() const
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation...
Definition: CGExpr.cpp:1337
bool hasSideEffects() const
Definition: Expr.h:555
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource AlignSource=AlignmentSource::Type, llvm::MDNode *TBAAInfo=nullptr, QualType TBAABaseTy=QualType(), uint64_t TBAAOffset=0, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
Definition: CGExpr.cpp:1236
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:2144
#define rint(__x)
Definition: tgmath.h:1138
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:114
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
APFloat & getFloat()
Definition: APValue.h:208
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:4307
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:192
#define round(__x)
Definition: tgmath.h:1155
OpenMPLinearClauseKind Modifier
Modifier of 'linear' clause.
Definition: OpenMPClause.h:262
bool isIntegerConstantExpr(llvm::APSInt &Result, const ASTContext &Ctx, SourceLocation *Loc=nullptr, bool isEvaluated=true) const
isIntegerConstantExpr - Return true if this expression is a valid integer constant expression...
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition: CGBuiltin.cpp:2163
EltType getEltType() const
An aligned address.
Definition: Address.h:25
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:5706
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:5159
Address EmitMSVAListRef(const Expr *E)
Emit a "reference" to a __builtin_ms_va_list; this is always the value of the expression, because a __builtin_ms_va_list is a pointer to a char.
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:193
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to .fabs().
Definition: CGBuiltin.cpp:223
llvm::Value * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:37
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type, returning the result.
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition: Expr.cpp:1273
ast_type_traits::DynTypedNode Node
QualType getType() const
Definition: Expr.h:125
CGFunctionInfo - Class to encapsulate the information about a function definition.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition: CGBuiltin.cpp:2184
CharUnits getAlignment() const
Return the alignment of this pointer.
Definition: Address.h:67
This class organizes the cross-function state that is used while generating LLVM code.
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:2261
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:92
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:561
Address CreateMemTemp(QualType T, const Twine &Name="tmp")
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignment...
Definition: CGExpr.cpp:97
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return 0.
Definition: Expr.cpp:1210
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:7272
llvm::LoadInst * CreateAlignedLoad(llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:99
const TargetInfo * getAuxTargetInfo() const
Definition: ASTContext.h:581
#define ceil(__x)
Definition: tgmath.h:608
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:78
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:2066
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
detail::InMemoryDirectory::const_iterator E
#define floor(__x)
Definition: tgmath.h:729
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:121
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2187
Flags to identify the types for overloaded Neon builtins.
bool isFloat() const
Definition: APValue.h:183
Expr * IgnoreParenImpCasts() LLVM_READONLY
IgnoreParenImpCasts - Ignore parentheses and implicit casts.
Definition: Expr.cpp:2551
Decl * getCalleeDecl()
Definition: Expr.cpp:1186
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx)
Definition: CGBuiltin.cpp:2157
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const NeonIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:2862
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops)
Definition: CGBuiltin.cpp:4151
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e...
Definition: Builtins.h:125
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition: CGBuiltin.cpp:4298
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:355
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition: CGBuiltin.cpp:2177
#define nearbyint(__x)
Definition: tgmath.h:1045
void setNontemporal(bool Value)
Definition: CGValue.h:286
X
Add a minimal nested name specifier fixit hint to allow lookup of a tag name from an outer enclosing ...
Definition: SemaDecl.cpp:11761
BoundNodesTreeBuilder *const Builder
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition: CGBuiltin.cpp:2823
llvm::Type * ConvertType(QualType T)
#define sqrt(__x)
Definition: tgmath.h:527
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:453
#define fabs(__x)
Definition: tgmath.h:556
Defines the clang::TargetInfo interface.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2134
uint64_t Width
Definition: ASTContext.h:81
bool isInt() const
Definition: APValue.h:182
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:232
static RValue get(llvm::Value *V)
Definition: CGValue.h:85
LValue - This represents an lvalue references.
Definition: CGValue.h:152
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char, signed char, short, int, long..], or an enum decl which has a signed representation.
Definition: Type.cpp:1700
APSInt & getInt()
Definition: APValue.h:200
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:56
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:136
bool isPoly() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:5568
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:2799
unsigned Column
Definition: Format.cpp:1349
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool IsRead)
Definition: CGBuiltin.cpp:3452
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:6972
bool isPointerType() const
Definition: Type.h:5305