llvm.org/doxygen/AMDGPUSubtarget_8cpp_source.html

//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// Implements the AMDGPU specific subclass of TargetSubtarget.

//

//===----------------------------------------------------------------------===//


#include "AMDGPUSubtarget.h"

#include "AMDGPUCallLowering.h"

#include "AMDGPUInstructionSelector.h"

#include "AMDGPULegalizerInfo.h"

#include "AMDGPURegisterBankInfo.h"

#include "R600Subtarget.h"

#include "SIMachineFunctionInfo.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"

#include "llvm/CodeGen/MachineScheduler.h"

#include "llvm/CodeGen/TargetFrameLowering.h"

#include "llvm/IR/DiagnosticInfo.h"

#include "llvm/IR/IntrinsicsAMDGPU.h"

#include "llvm/IR/IntrinsicsR600.h"

#include "llvm/IR/MDBuilder.h"

#include <algorithm>


using namespace llvm;


#define DEBUG_TYPE "amdgpu-subtarget"


AMDGPUSubtarget::AMDGPUSubtarget(Triple TT) : TargetTriple(std::move(TT)) {}


bool AMDGPUSubtarget::useRealTrue16Insts() const {

  return hasTrue16BitInsts() && EnableRealTrue16Insts;

}


// Returns the maximum per-workgroup LDS allocation size (in bytes) that still

// allows the given function to achieve an occupancy of NWaves waves per

// SIMD / EU, taking into account only the function's *maximum* workgroup size.

unsigned

AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,

                                                 const Function &F) const {

  const unsigned WaveSize = getWavefrontSize();

  const unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;

  const unsigned WavesPerWorkgroup =

      std::max(1u, (WorkGroupSize + WaveSize - 1) / WaveSize);


  const unsigned WorkGroupsPerCU =

      std::max(1u, (NWaves * getEUsPerCU()) / WavesPerWorkgroup);


  return getLocalMemorySize() / WorkGroupsPerCU;

}


// FIXME: Should return min,max range.

//

// Returns the maximum occupancy, in number of waves per SIMD / EU, that can

// be achieved when only the given function is running on the machine; and

// taking into account the overall number of wave slots, the (maximum) workgroup

// size, and the per-workgroup LDS allocation size.

unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,

  const Function &F) const {

  const unsigned MaxWorkGroupSize = getFlatWorkGroupSizes(F).second;

  const unsigned MaxWorkGroupsPerCu = getMaxWorkGroupsPerCU(MaxWorkGroupSize);

  if (!MaxWorkGroupsPerCu)

    return 0;


  const unsigned WaveSize = getWavefrontSize();


  // FIXME: Do we need to account for alignment requirement of LDS rounding the

  // size up?

  // Compute restriction based on LDS usage

  unsigned NumGroups = getLocalMemorySize() / (Bytes ? Bytes : 1u);


  // This can be queried with more LDS than is possible, so just assume the

  // worst.

  if (NumGroups == 0)

    return 1;


  NumGroups = std::min(MaxWorkGroupsPerCu, NumGroups);


  // Round to the number of waves per CU.

  const unsigned MaxGroupNumWaves = divideCeil(MaxWorkGroupSize, WaveSize);

  unsigned MaxWaves = NumGroups * MaxGroupNumWaves;


  // Number of waves per EU (SIMD).

  MaxWaves = divideCeil(MaxWaves, getEUsPerCU());


  // Clamp to the maximum possible number of waves.

  MaxWaves = std::min(MaxWaves, getMaxWavesPerEU());


  // FIXME: Needs to be a multiple of the group size?

  //MaxWaves = MaxGroupNumWaves * (MaxWaves / MaxGroupNumWaves);


  assert(MaxWaves > 0 && MaxWaves <= getMaxWavesPerEU() &&

         "computed invalid occupancy");

  return MaxWaves;

}


unsigned

AMDGPUSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const {

  const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();

  return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction());

}


std::pair<unsigned, unsigned>

AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {

  switch (CC) {

  case CallingConv::AMDGPU_VS:

  case CallingConv::AMDGPU_LS:

  case CallingConv::AMDGPU_HS:

  case CallingConv::AMDGPU_ES:

  case CallingConv::AMDGPU_GS:

  case CallingConv::AMDGPU_PS:

    return std::pair(1, getWavefrontSize());

  default:

    return std::pair(1u, getMaxFlatWorkGroupSize());

  }

}


std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(

  const Function &F) const {

  // Default minimum/maximum flat work group sizes.

  std::pair<unsigned, unsigned> Default =

    getDefaultFlatWorkGroupSize(F.getCallingConv());


  // Requested minimum/maximum flat work group sizes.

  std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(

    F, "amdgpu-flat-work-group-size", Default);


  // Make sure requested minimum is less than requested maximum.

  if (Requested.first > Requested.second)

    return Default;


  // Make sure requested values do not violate subtarget's specifications.

  if (Requested.first < getMinFlatWorkGroupSize())

    return Default;

  if (Requested.second > getMaxFlatWorkGroupSize())

    return Default;


  return Requested;

}


std::pair<unsigned, unsigned> AMDGPUSubtarget::getEffectiveWavesPerEU(

    std::pair<unsigned, unsigned> Requested,

    std::pair<unsigned, unsigned> FlatWorkGroupSizes) const {

  // Default minimum/maximum number of waves per execution unit.

  std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());


  // If minimum/maximum flat work group sizes were explicitly requested using

  // "amdgpu-flat-workgroup-size" attribute, then set default minimum/maximum

  // number of waves per execution unit to values implied by requested

  // minimum/maximum flat work group sizes.

  unsigned MinImpliedByFlatWorkGroupSize =

    getWavesPerEUForWorkGroup(FlatWorkGroupSizes.second);

  Default.first = MinImpliedByFlatWorkGroupSize;


  // Make sure requested minimum is less than requested maximum.

  if (Requested.second && Requested.first > Requested.second)

    return Default;


  // Make sure requested values do not violate subtarget's specifications.

  if (Requested.first < getMinWavesPerEU() ||

      Requested.second > getMaxWavesPerEU())

    return Default;


  // Make sure requested values are compatible with values implied by requested

  // minimum/maximum flat work group sizes.

  if (Requested.first < MinImpliedByFlatWorkGroupSize)

    return Default;


  return Requested;

}


std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(

    const Function &F, std::pair<unsigned, unsigned> FlatWorkGroupSizes) const {

  // Default minimum/maximum number of waves per execution unit.

  std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());


  // Requested minimum/maximum number of waves per execution unit.

  std::pair<unsigned, unsigned> Requested =

      AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", Default, true);

  return getEffectiveWavesPerEU(Requested, FlatWorkGroupSizes);

}


static unsigned getReqdWorkGroupSize(const Function &Kernel, unsigned Dim) {

  auto *Node = Kernel.getMetadata("reqd_work_group_size");

  if (Node && Node->getNumOperands() == 3)

    return mdconst::extract<ConstantInt>(Node->getOperand(Dim))->getZExtValue();

  return std::numeric_limits<unsigned>::max();

}


bool AMDGPUSubtarget::isMesaKernel(const Function &F) const {

  return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());

}


unsigned AMDGPUSubtarget::getMaxWorkitemID(const Function &Kernel,

                                           unsigned Dimension) const {

  unsigned ReqdSize = getReqdWorkGroupSize(Kernel, Dimension);

  if (ReqdSize != std::numeric_limits<unsigned>::max())

    return ReqdSize - 1;

  return getFlatWorkGroupSizes(Kernel).second - 1;

}


bool AMDGPUSubtarget::isSingleLaneExecution(const Function &Func) const {

  for (int I = 0; I < 3; ++I) {

    if (getMaxWorkitemID(Func, I) > 0)

      return false;

  }


  return true;

}


bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {

  Function *Kernel = I->getParent()->getParent();

  unsigned MinSize = 0;

  unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;

  bool IdQuery = false;


  // If reqd_work_group_size is present it narrows value down.

  if (auto *CI = dyn_cast<CallInst>(I)) {

    const Function *F = CI->getCalledFunction();

    if (F) {

      unsigned Dim = UINT_MAX;

      switch (F->getIntrinsicID()) {

      case Intrinsic::amdgcn_workitem_id_x:

      case Intrinsic::r600_read_tidig_x:

        IdQuery = true;

        [[fallthrough]];

      case Intrinsic::r600_read_local_size_x:

        Dim = 0;

        break;

      case Intrinsic::amdgcn_workitem_id_y:

      case Intrinsic::r600_read_tidig_y:

        IdQuery = true;

        [[fallthrough]];

      case Intrinsic::r600_read_local_size_y:

        Dim = 1;

        break;

      case Intrinsic::amdgcn_workitem_id_z:

      case Intrinsic::r600_read_tidig_z:

        IdQuery = true;

        [[fallthrough]];

      case Intrinsic::r600_read_local_size_z:

        Dim = 2;

        break;

      default:

        break;

      }


      if (Dim <= 3) {

        unsigned ReqdSize = getReqdWorkGroupSize(*Kernel, Dim);

        if (ReqdSize != std::numeric_limits<unsigned>::max())

          MinSize = MaxSize = ReqdSize;

      }

    }

  }


  if (!MaxSize)

    return false;


  // Range metadata is [Lo, Hi). For ID query we need to pass max size

  // as Hi. For size query we need to pass Hi + 1.

  if (IdQuery)

    MinSize = 0;

  else

    ++MaxSize;


  APInt Lower{32, MinSize};

  APInt Upper{32, MaxSize};

  if (auto *CI = dyn_cast<CallBase>(I)) {

    ConstantRange Range(Lower, Upper);

    CI->addRangeRetAttr(Range);

  } else {

    MDBuilder MDB(I->getContext());

    MDNode *MaxWorkGroupSizeRange = MDB.createRange(Lower, Upper);

    I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);

  }

  return true;

}


unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const {

  assert(AMDGPU::isKernel(F.getCallingConv()));


  // We don't allocate the segment if we know the implicit arguments weren't

  // used, even if the ABI implies we need them.

  if (F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))

    return 0;


  if (isMesaKernel(F))

    return 16;


  // Assume all implicit inputs are used by default

  const Module *M = F.getParent();

  unsigned NBytes =

      AMDGPU::getAMDHSACodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5 ? 256 : 56;

  return F.getFnAttributeAsParsedInteger("amdgpu-implicitarg-num-bytes",

                                         NBytes);

}


uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,

                                                 Align &MaxAlign) const {

  assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||

         F.getCallingConv() == CallingConv::SPIR_KERNEL);


  const DataLayout &DL = F.getDataLayout();

  uint64_t ExplicitArgBytes = 0;

  MaxAlign = Align(1);


  for (const Argument &Arg : F.args()) {

    if (Arg.hasAttribute("amdgpu-hidden-argument"))

      continue;


    const bool IsByRef = Arg.hasByRefAttr();

    Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();

    Align Alignment = DL.getValueOrABITypeAlignment(

        IsByRef ? Arg.getParamAlign() : std::nullopt, ArgTy);

    uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);

    ExplicitArgBytes = alignTo(ExplicitArgBytes, Alignment) + AllocSize;

    MaxAlign = std::max(MaxAlign, Alignment);

  }


  return ExplicitArgBytes;

}


unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F,

                                                Align &MaxAlign) const {

  if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL &&

      F.getCallingConv() != CallingConv::SPIR_KERNEL)

    return 0;


  uint64_t ExplicitArgBytes = getExplicitKernArgSize(F, MaxAlign);


  unsigned ExplicitOffset = getExplicitKernelArgOffset();


  uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes;

  unsigned ImplicitBytes = getImplicitArgNumBytes(F);

  if (ImplicitBytes != 0) {

    const Align Alignment = getAlignmentForImplicitArgPtr();

    TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;

    MaxAlign = std::max(MaxAlign, Alignment);

  }


  // Being able to dereference past the end is useful for emitting scalar loads.

  return alignTo(TotalSize, 4);

}


AMDGPUDwarfFlavour AMDGPUSubtarget::getAMDGPUDwarfFlavour() const {

  return getWavefrontSize() == 32 ? AMDGPUDwarfFlavour::Wave32

                                  : AMDGPUDwarfFlavour::Wave64;

}


const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) {

  if (MF.getTarget().getTargetTriple().getArch() == Triple::amdgcn)

    return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<GCNSubtarget>());

  return static_cast<const AMDGPUSubtarget &>(MF.getSubtarget<R600Subtarget>());

}


const AMDGPUSubtarget &AMDGPUSubtarget::get(const TargetMachine &TM, const Function &F) {

  if (TM.getTargetTriple().getArch() == Triple::amdgcn)

    return static_cast<const AMDGPUSubtarget&>(TM.getSubtarget<GCNSubtarget>(F));

  return static_cast<const AMDGPUSubtarget &>(

      TM.getSubtarget<R600Subtarget>(F));

}


// FIXME: This has no reason to be in subtarget

SmallVector<unsigned>

AMDGPUSubtarget::getMaxNumWorkGroups(const Function &F) const {

  return AMDGPU::getIntegerVecAttribute(F, "amdgpu-max-num-workgroups", 3,

                                        std::numeric_limits<uint32_t>::max());

}

AMDGPUBaseInfo.h

AMDGPUCallLowering.h
This file describes how to lower LLVM calls to machine code calls.

AMDGPUInstructionSelector.h
This file declares the targeting of the InstructionSelector class for AMDGPU.

AMDGPULegalizerInfo.h
This file declares the targeting of the Machinelegalizer class for AMDGPU.

AMDGPURegisterBankInfo.h
This file declares the targeting of the RegisterBankInfo class for AMDGPU.

getReqdWorkGroupSize
static unsigned getReqdWorkGroupSize(const Function &Kernel, unsigned Dim)
Definition: AMDGPUSubtarget.cpp:189

AMDGPUSubtarget.h
Base class for AMDGPU specific classes of TargetSubtarget.

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: ARMSLSHardening.cpp:73

DiagnosticInfo.h

InlineAsmLowering.h
This file describes how to lower LLVM inline asm to machine code INLINEASM.

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

MDBuilder.h

MachineScheduler.h

Range
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

if
if(PassOpts->AAPipeline)
Definition: PassBuilderBindings.cpp:64

R600Subtarget.h
AMDGPU R600 specific subclass of TargetSubtarget.

CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

SIMachineFunctionInfo.h

TargetFrameLowering.h

Node
Definition: ItaniumDemangle.h:163

llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29

llvm::AMDGPUSubtarget::getOccupancyWithLocalMemSize
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
Definition: AMDGPUSubtarget.cpp:64

llvm::AMDGPUSubtarget::isMesa3DOS
bool isMesa3DOS() const
Definition: AMDGPUSubtarget.h:144

llvm::AMDGPUSubtarget::getDefaultFlatWorkGroupSize
std::pair< unsigned, unsigned > getDefaultFlatWorkGroupSize(CallingConv::ID CC) const
Definition: AMDGPUSubtarget.cpp:110

llvm::AMDGPUSubtarget::EnableRealTrue16Insts
bool EnableRealTrue16Insts
Definition: AMDGPUSubtarget.h:60

llvm::AMDGPUSubtarget::getAlignmentForImplicitArgPtr
Align getAlignmentForImplicitArgPtr() const
Definition: AMDGPUSubtarget.h:278

llvm::AMDGPUSubtarget::getEUsPerCU
unsigned getEUsPerCU() const
Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the "CU" is the unit onto whic...
Definition: AMDGPUSubtarget.h:276

llvm::AMDGPUSubtarget::isMesaKernel
bool isMesaKernel(const Function &F) const
Definition: AMDGPUSubtarget.cpp:196

llvm::AMDGPUSubtarget::getWavesPerEU
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
Definition: AMDGPUSubtarget.h:109

llvm::AMDGPUSubtarget::useRealTrue16Insts
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
Definition: AMDGPUSubtarget.cpp:37

llvm::AMDGPUSubtarget::getMinWavesPerEU
virtual unsigned getMinWavesPerEU() const =0

llvm::AMDGPUSubtarget::getFlatWorkGroupSizes
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
Definition: AMDGPUSubtarget.cpp:124

llvm::AMDGPUSubtarget::makeLIDRangeMetadata
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* intrinsic call or load.
Definition: AMDGPUSubtarget.cpp:217

llvm::AMDGPUSubtarget::getMaxWorkitemID
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
Definition: AMDGPUSubtarget.cpp:200

llvm::AMDGPUSubtarget::getImplicitArgNumBytes
unsigned getImplicitArgNumBytes(const Function &F) const
Definition: AMDGPUSubtarget.cpp:285

llvm::AMDGPUSubtarget::getLocalMemorySize
unsigned getLocalMemorySize() const
Return the maximum number of bytes of LDS available for all workgroups running on the same WGP or CU.
Definition: AMDGPUSubtarget.h:261

llvm::AMDGPUSubtarget::getMaxNumWorkGroups
SmallVector< unsigned > getMaxNumWorkGroups(const Function &F) const
Return the number of work groups for the function.
Definition: AMDGPUSubtarget.cpp:371

llvm::AMDGPUSubtarget::getWavesPerEUForWorkGroup
virtual unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const =0

llvm::AMDGPUSubtarget::getMaxWorkGroupsPerCU
virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const =0

llvm::AMDGPUSubtarget::getKernArgSegmentSize
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:329

llvm::AMDGPUSubtarget::hasTrue16BitInsts
bool hasTrue16BitInsts() const
Return true if the subtarget supports True16 instructions.
Definition: AMDGPUSubtarget.h:167

llvm::AMDGPUSubtarget::getAMDGPUDwarfFlavour
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const
Definition: AMDGPUSubtarget.cpp:351

llvm::AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.
Definition: AMDGPUSubtarget.cpp:45

llvm::AMDGPUSubtarget::getMaxFlatWorkGroupSize
virtual unsigned getMaxFlatWorkGroupSize() const =0

llvm::AMDGPUSubtarget::AMDGPUSubtarget
AMDGPUSubtarget(Triple TT)
Definition: AMDGPUSubtarget.cpp:35

llvm::AMDGPUSubtarget::getExplicitKernelArgOffset
unsigned getExplicitKernelArgOffset() const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
Definition: AMDGPUSubtarget.h:284

llvm::AMDGPUSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: AMDGPUSubtarget.h:321

llvm::AMDGPUSubtarget::getExplicitKernArgSize
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:304

llvm::AMDGPUSubtarget::getEffectiveWavesPerEU
std::pair< unsigned, unsigned > getEffectiveWavesPerEU(std::pair< unsigned, unsigned > WavesPerEU, std::pair< unsigned, unsigned > FlatWorkGroupSizes) const
Definition: AMDGPUSubtarget.cpp:147

llvm::AMDGPUSubtarget::isSingleLaneExecution
bool isSingleLaneExecution(const Function &Kernel) const
Return true if only a single workitem can be active in a wave.
Definition: AMDGPUSubtarget.cpp:208

llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition: AMDGPUSubtarget.cpp:356

llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:249

llvm::AMDGPUSubtarget::getMinFlatWorkGroupSize
virtual unsigned getMinFlatWorkGroupSize() const =0

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:78

llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31

llvm::ConstantRange
This class represents a range of values.
Definition: ConstantRange.h:47

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63

llvm::Function
Definition: Function.h:63

llvm::GCNSubtarget
Definition: GCNSubtarget.h:34

llvm::GlobalObject::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Value.h:565

llvm::Instruction
Definition: Instruction.h:68

llvm::MDBuilder
Definition: MDBuilder.h:36

llvm::MDBuilder::createRange
MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition: MDBuilder.cpp:95

llvm::MDNode
Metadata node.
Definition: Metadata.h:1069

llvm::MachineFunction
Definition: MachineFunction.h:258

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:714

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:685

llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:812

llvm::MachineFunction::getTarget
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:710

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65

llvm::R600Subtarget
Definition: R600Subtarget.h:29

llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:390

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77

llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:126

llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44

llvm::Triple::amdgcn
@ amdgcn
Definition: Triple.h:74

llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:383

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

uint32_t

uint64_t

unsigned

llvm::AMDGPU::isKernel
LLVM_READNONE bool isKernel(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.h:1301

llvm::AMDGPU::getAMDHSACodeObjectVersion
unsigned getAMDHSACodeObjectVersion(const Module &M)
Definition: AMDGPUBaseInfo.cpp:172

llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:2041

llvm::AMDGPU::AMDHSA_COV5
@ AMDHSA_COV5
Definition: AMDGPUBaseInfo.h:56

llvm::AMDGPU::getIntegerVecAttribute
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
Definition: AMDGPUBaseInfo.cpp:1367

llvm::AMDGPU::getIntegerPairAttribute
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
Definition: AMDGPUBaseInfo.cpp:1332

llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:188

llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200

llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:206

llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191

llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194

llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144

llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:218

llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:213

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::HexPrintStyle::Upper
@ Upper

llvm::HexPrintStyle::Lower
@ Lower

llvm::divideCeil
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:403

llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155

llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1873

llvm::InstructionUniformity::Default
@ Default
The result values are uniform if and only if all operands are uniform.

llvm::AMDGPUDwarfFlavour
AMDGPUDwarfFlavour
Definition: AMDGPUMCTargetDesc.h:32

llvm::Wave32
@ Wave32
Definition: AMDGPUMCTargetDesc.h:32

llvm::Wave64
@ Wave64
Definition: AMDGPUMCTargetDesc.h:32

std
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39